Beispiel #1
0
def query():
    # n_initial = 100
    # X, y = load_digits(return_X_y=True)
    # X_train, X_test, y_train, y_test = train_test_split(X, y)
    #
    # initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
    #
    # X_initial, y_initial = X_train[initial_idx], y_train[initial_idx]
    # X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0)
    strategy = None
    classifier = None

    file = request.files['file']
    # if user does not select file, browser also
    # submit a empty part without filename
    filename = secure_filename(file.filename)

    # shutil.rmtree(os.path.join(app.config['UPLOAD_FOLDER'],filename.split(".")[0]))
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        file.save(os.path.join(UPLOAD_FOLDER, filename))
        if(filename.split(".")[1]=="rar"):
            patoolib.extract_archive(os.path.join(UPLOAD_FOLDER, filename), outdir=os.path.join(UPLOAD_FOLDER))
        else:
            zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r')
            zip_ref.extractall(UPLOAD_FOLDER)
            zip_ref.close()
            print("Succesfull")

    st = request.form.get('strategy_select')
    cl = request.form.get('classifier_select')
    option = int(request.form.get('structure_select'))
    print(cl)
    if(str(cl)=='Random Forest'):
        classifier = RandomForestClassifier()
    elif(str(cl)=='KNN'):
        classifier = KNeighborsClassifier()
    else:
        classifier = DecisionTreeClassifier()

    n_queries = request.form['queries']

    print(st)
    classlist =[]
    classes = {}
    data = {}
    data['image'] = []
    data['label'] = []
    filename = secure_filename(file.filename)
    print(filename)
    if option == 0:
        for dirname, _, filenames in os.walk(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])):
            print(filenames)
            for filename in filenames:
                if('.jpg' in filename or 'jpeg' in filename or 'png' in filename):
                    image = Image.open(os.path.join(dirname, filename))
                    image = image.resize((200,200), Image.ANTIALIAS)
                    size = np.array(image).size
                    if(len(classes)==0):
                        data['image'] = np.array(numpy.array(image)).reshape((1,size))
                    else:
                        try:
                            x = numpy.array(image).reshape((1,size))
                            data['image'] = np.append(data['image'],x,axis=0)
                        except:
                            continue
                    if(dirname.split('\\')[-1] not in classes.keys()):
                        classlist.append({'name':dirname.split('\\')[-1],'number':len(classes)})
                        classes[dirname.split('\\')[-1]] = len(classes)
                            #print(os.path.join(dirname, filename))
                            #print(dirname)

                    data['label'].append(classes[dirname.split('\\')[-1]])
                    print(classes)
    else:
        for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])):
            if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith("png"):
                image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile))
                image = image.resize((200,200), Image.ANTIALIAS)
                size = np.array(image).size

                if(len(classes)==0):
                    data['image'] = np.array(numpy.array(image)).reshape((1,size))
                else:
                    try:
                        x = numpy.array(image).reshape((1,size))
                        data['image'] = np.append(data['image'],x,axis=0)
                    except:
                        continue
                if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in classes.keys()):
                    classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(classes)})
                    classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(classes)
                data['label'].append(classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))])
                print(classes)
            else:
                continue

    X = data['image']
    y = data['label']
    n_initial = 100
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
    X_initial=[]
    y_initial = []
    print(type(X_initial))
    for i in range(n_initial):

        v = np.array(X_train[initial_idx[i]]).reshape((1,size))

        #print(v.shape)
        y_initial.append(y_train[i])
        if(i==0):
            X_initial = np.array(X_train[initial_idx[i]]).reshape((1,size))

            print(X_initial.shape)
        else:
            X_initial = np.append(X_initial,v,axis=0)
        #print("X Shape",X_initial.shape)
        #     X_initial = X_initial.append(X_train[initial_idx[i]])
    X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0)
    print(X.shape)
    print(X[0].shape)
    print(X_initial.shape)

    params = {}
    params["X_test"] = X_test
    params["y_test"] = y_test
    params["counter"] = n_queries
    params["X_pool"] = X_pool
    params["y_pool"] = y_pool
    if(str(st)=='Uncertainty Sampling'):

        print(classifier)
        print(cl)
        learner = ActiveLearner(
            estimator=classifier,
            query_strategy=uncertainty_sampling,
            X_training=X_initial, y_training=y_initial
        )

        params["learner"] = learner
        accuracy_scores = learner.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries)
        print("Calling Helper")
        return helper()
    elif(str(st)=='Entropy Sampling'):

        print(classifier)
        print(cl)
        learner = ActiveLearner(
            estimator=classifier,
            query_strategy=entropy_sampling,
            X_training=X_initial, y_training=y_initial
        )

        params["learner"] = learner
        accuracy_scores = learner.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries)
        return helper()
    elif(str(st)=='Random Sampling'):
        learner = ActiveLearner(
            estimator=classifier,
            query_strategy=random_sampling,
            X_training=X_train, y_training=y_train
        )
        accuracy_scores = learner.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries)
        return helper()
    elif(str(st)=='Query By Committee(Vote Entropy Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=vote_entropy_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()

    elif(str(st)=='Query By Committee(Uncertainty Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=uncertainty_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()

    elif(str(st)=='Query By Committee(Max Disagreement Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=max_disagreement_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()

    elif(str(st)=='Query By Committee(Max STD Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=max_std_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()

    elif(str(st)=='Query By Committee(Consensus Entropy Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=consensus_entropy_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()
Beispiel #2
0
def parse_parser_results_new(text):
    """ This is the nasty bit of code to interact with the command-line
    interface of the CoreNLP tools.  Takes a string of the parser results
    and then returns a Python list of dictionaries, one for each parsed
    sentence.

    updated for newer version of stanford corenlp -- 2015
    """
    data_list = []
    data = None
    lastline = None
    following_line = None
    state = STATE_START
    #for line in re.split("\r\n(?![^\[]*\])",text):
    seqs = re.split("\r\n", text)
    i = 0

    #for line in re.split("\r\n", text):
    while i < len(seqs):
        line = seqs[i]
        line = line.strip()

        if line.startswith('NLP>'):  # end
            if data: data_list.append(data)  # add last one
            break
        if line.startswith("Sentence #"):
            if data: data_list.append(data)
            data = Data()
            if SENTENCE_NO_PATTERN.match(line):
                state = STATE_TEXT
            else:
                lastline = line
                state = STATE_SENT_ERROR
            i += 1

        elif state == STATE_SENT_ERROR:
            line = lastline + line
            assert SENTENCE_NO_PATTERN.match(line) is not None
            state = STATE_TEXT
            i += 1

        elif state == STATE_TEXT_ERROR:
            line = line + following_line
            data.addText(line)
            state = STATE_WORDS
            i += 2

        elif state == STATE_TEXT:
            Data.newSen()
            data.addText(line)
            state = STATE_WORDS
            i += 1

        elif state == STATE_WORDS:
            if len(line) == 0:
                continue
            if not line.startswith("[Text="):
                #raise Exception('Parse error. Could not find "[Text=" in: %s' % line)
                print >> sys.stderr, 'Parse error. Could not find "[Text=" in: %s' % line
                print >> sys.stderr, 'Attempt to fixing error.'
                following_line = line
                state = STATE_TEXT_ERROR
                i -= 1
                continue

            #for s in WORD_PATTERN.findall(line):
            wline = line
            while WORD_PATTERN.match(wline):
                t = parse_bracketed(wline[1:-1])
                if t[0] == '':
                    i += 1
                    wline = seqs[i]
                    continue
                data.addToken(t[0], t[1][u'CharacterOffsetBegin'],
                              t[1][u'CharacterOffsetEnd'], t[1][u'Lemma'],
                              t[1][u'PartOfSpeech'], t[1][u'NamedEntityTag'])
                i += 1
                wline = seqs[i]

            if WORD_ERROR_PATTERN.match(wline):  # handle format error
                wline = wline + seqs[i + 1]
                wline = wline.strip()
                t = parse_bracketed(wline[1:-1])
                data.addToken(t[0], t[1][u'CharacterOffsetBegin'],
                              t[1][u'CharacterOffsetEnd'], t[1][u'Lemma'],
                              t[1][u'PartOfSpeech'], t[1][u'NamedEntityTag'])
                i += 2
                state = STATE_WORDS
                continue
            state = STATE_TREE
            parsed = []

        elif state == STATE_TREE:
            if len(line) == 0:
                state = STATE_DEPENDENCY
                parsed = " ".join(parsed)
                i += 1
                #data.addTree(Tree.parse(parsed))
            else:
                parsed.append(line)
                i += 1

        elif state == STATE_DEPENDENCY:
            if len(line) == 0:
                state = STATE_COREFERENCE
            else:
                pass
                '''
                # don't need here
                split_entry = re.split("\(|, ", line[:-1])
                if len(split_entry) == 3:
                    rel, l_lemma, r_lemma = split_entry
                    m = re.match(r'(?P<lemma>.+)-(?P<index>[^-]+)', l_lemma)
                    l_lemma, l_index = m.group('lemma'), m.group('index')
                    m = re.match(r'(?P<lemma>.+)-(?P<index>[^-]+)', r_lemma)
                    r_lemma, r_index = m.group('lemma'), m.group('index')

                    data.addDependency( rel, l_lemma, r_lemma, l_index, r_index)
                '''

            i += 1
        elif state == STATE_COREFERENCE:
            if "Coreference set" in line:
                #if 'coref' not in results:
                #    results['coref'] = []
                coref_set = []
                data.addCoref(coref_set)
            else:
                for src_i, src_pos, src_l, src_r, sink_i, sink_pos, sink_l, sink_r, src_word, sink_word in CR_PATTERN.findall(
                        line):
                    src_i, src_pos, src_l, src_r = int(src_i), int(
                        src_pos), int(src_l), int(src_r)
                    sink_i, sink_pos, sink_l, sink_r = int(sink_i), int(
                        sink_pos), int(sink_l), int(sink_r)
                    coref_set.append(
                        ((src_word, src_i, src_pos, src_l, src_r),
                         (sink_word, sink_i, sink_pos, sink_l, sink_r)))

            i += 1
        else:
            i += 1

    return data_list
Beispiel #3
0
# -*- coding: utf-8 -*-

from sys import path
path.append('./data')
path.append('./methods')

from data import Data
from svm import SVM
from ann import ANN
from nb import NaiveBayes
from time import time
import graph

data = Data("mushrooms.csv")
method = ANN(data)
i = time()
method.train()
method.predict()
tempo = time() - i
result = method.getPercentage()
print 'Tempo (ms):', tempo
print 'Taxa de acerto:', result
print ''

data = Data("mushrooms.csv")
method = SVM(data)
i = time()
method.train()
method.predict()
tempo = time() - i
result = method.getPercentage()
Beispiel #4
0
        complete_data = []
        with open(self.path, encoding='cp932', errors='ignore') as f:
            reader = csv.DictReader(f)
            for line in reader:
                context = [
                    line['InputSentence1'], line['InputSentence2'],
                    line['InputSentence3'], line['InputSentence4']
                ]
                option_0 = line['RandomFifthSentenceQuiz1']
                option_1 = line['RandomFifthSentenceQuiz2']
                label = int(line['AnswerRightEnding']) - 1
                complete_data.append({
                    'context': context,
                    'options': [option_0, option_1],
                    'label': label
                })
        return complete_data


if __name__ == '__main__':

    from data import Data
    train_data = Data('data/train2017.csv').get_train_data()
    print(train_data[-1], len(train_data))
    valid_2016_data = Data('data/valid2016.csv').get_validtest_data()
    valid_2018_data = Data('data/valid2018.csv').get_validtest_data()
    test_data = Data('data/test2016.csv').get_validtest_data()
    print(valid_2016_data[-1], len(valid_2016_data))
    print(valid_2018_data[-1], len(valid_2018_data))
    print(test_data[-1], len(test_data))
 def gen_data():
     offset = csts.HEIGHT / (csts.LEN + 1)
     array = [Data(i * offset) for i in range(1, csts.LEN + 1)]
     random.shuffle(array)
     return array
            for j in range(5):
                combineFoldName = combineFoldsNames[j]
                singleFoldName = singleFoldNames[j]
                _indexCollection = []
                _root = []
                X_train, y_train, num_features = read_libsvm(combineFoldName)
                x = X_train.todense()
                for y in range(size):
                    _index = []
                    for z in range(50):
                        _index.append(random.randint(0, 255))
                    formatFile(x, y_train, _index, 799)
                    trainData = np.loadtxt('fileFormated',
                                           delimiter=',',
                                           dtype=str)
                    trainData_obj = Data(data=trainData)
                    attributesSet = trainData_obj.attributes
                    root = id3Depth(attributesSet, trainData_obj, 1)
                    _root.append(root)
                    _indexCollection.append(_index)

                _predictions = []
                full = True
                X_train, y_train, num_features = read_libsvm(singleFoldName)
                x = X_train.todense()
                for z in range(size):
                    formatFile(x, y_train, _indexCollection[z], 867598)
                    trainData = np.loadtxt('fileFormated',
                                           delimiter=',',
                                           dtype=str)
                    trainData_obj = Data(data=trainData)
Beispiel #7
0
def getCashFlow(strid):

    data = Data()
    #---------現金流量表畫圖
    length_cashFlow = len(data.dates['cashFlowsSheet'])
    cashFlowOperating = data.get('營業活動之淨現金流入(流出)', length_cashFlow)[strid].to_frame()
    cashFlowOperating = cashFlowOperating.reset_index()
    cashFlowOperating['date'] = cashFlowOperating['date'].apply(modifMonthtoSeason)
    copyCashFlow = cashFlowOperating.copy()
    for i in range(1, len(cashFlowOperating)):
        cashFlowOperating.at[i, strid] = cashFlowOperating.at[i, strid] - copyCashFlow.at[i-1, strid]
    
    cashFlowOperating.set_index('date', inplace=True)

    #投資現金流
    length_investFlow = len(data.dates['cashFlowsSheet'])
    investFlowOperating = data.get('投資活動之淨現金流入(流出)', length_investFlow)[strid].to_frame()
    investFlowOperating = investFlowOperating.reset_index()
    investFlowOperating['date'] = investFlowOperating['date'].apply(modifMonthtoSeason)
    copyInvestFlow = investFlowOperating.copy()
    for i in range(1, len(investFlowOperating)):
        investFlowOperating.at[i, strid] = investFlowOperating.at[i, strid] - copyInvestFlow.at[i-1, strid]
    
    investFlowOperating.set_index('date', inplace=True)
    #End 投資現金流

    #籌資現金流
    length_funddraseFlow = len(data.dates['cashFlowsSheet'])
    fundraseFlowOperating = data.get('籌資活動之淨現金流入(流出)', length_funddraseFlow)[strid].to_frame()
    fundraseFlowOperating = fundraseFlowOperating.reset_index()
    fundraseFlowOperating['date'] = fundraseFlowOperating['date'].apply(modifMonthtoSeason)
    copyfundraseFlow = fundraseFlowOperating.copy()
    for i in range(1, len(fundraseFlowOperating)):
        fundraseFlowOperating.at[i, strid] = fundraseFlowOperating.at[i, strid] - copyfundraseFlow.at[i-1, strid]
    
    fundraseFlowOperating.set_index('date', inplace=True)
    #End 籌資現金流

    #淨現金流
    #淨現金流 = 營業現金流 - 投資現金流 + 籌資現金流
    netCashFlow = cashFlowOperating.copy()
    re_netCash = netCashFlow.reset_index()
    re_cash = cashFlowOperating.reset_index()
    re_invest = investFlowOperating.reset_index()
    re_fund = fundraseFlowOperating.reset_index()
    for i in range(0, len(cashFlowOperating)):
        if re_invest.at[i, strid] < 0:
            re_netCash.at[i, strid] = re_cash.at[i, strid] + re_invest.at[i, strid]
        else:
            re_netCash.at[i, strid] = re_cash.at[i, strid] - re_invest.at[i, strid]
    re_netCash.set_index('date', inplace=True)
    re_netCash = re_netCash.add(fundraseFlowOperating, axis=0)
    #End 淨現金流

    cashFlowOperatingfig = [{
        'x' : cashFlowOperating.index,
        'y' : cashFlowOperating[strid],
        'type':'line',
        'name':'營業現金流',
        'hovertemplate': "%{x}營業現金流= %{y:$,}"
    },{
        'x': investFlowOperating.index,
        'y':investFlowOperating[strid],
        'type':'line',
        'name':'投資現金流',
        'hovertemplate': "%{x}投資現金流= %{y:$,}"
    },{
        'x': fundraseFlowOperating.index,
        'y':fundraseFlowOperating[strid],
        'type':'line',
        'name':'籌資現金流',
        'hovertemplate': "%{x}籌資現金流= %{y:$,}"
    },{
        'x': re_netCash.index,
        'y':re_netCash[strid],
        'type':'line',
        'name':'淨現金流',
        'hovertemplate': "%{x}淨現金流= %{y:$,}"
    },
    ]
    #---------End  現金流量表畫圖

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=(cashFlowOperatingfig[0])['x'], y=(cashFlowOperatingfig[0])['y'], name='營業現金流', hovertemplate='%{x}營業現金流= %{y:$,}'))
    fig.add_trace(go.Scatter(x=(cashFlowOperatingfig[1])['x'], y=(cashFlowOperatingfig[1])['y'], name='投資現金流', hovertemplate="%{x}投資現金流= %{y:$,}"))
    fig.add_trace(go.Scatter(x=(cashFlowOperatingfig[2])['x'], y=(cashFlowOperatingfig[2])['y'], name='籌資現金流', hovertemplate='%{x}籌資現金流= %{y:$,}'))
    fig.add_trace(go.Scatter(x=(cashFlowOperatingfig[3])['x'], y=(cashFlowOperatingfig[3])['y'], name='淨現金流', hovertemplate='%{x}淨現金流= %{y:$,}'))
    fig.update_layout(
        plot_bgcolor = '#36404A',
        paper_bgcolor = '#36404A',
        font_color = '#7FDBFF',
        xaxis = {'title':'季度'},
        yaxis = {'title':'千元'}
    )
    return fig
Beispiel #8
0
st.write('')
st.subheader('2.1 Peek at the raw data')

st.write('**The first 5 rows of the raw data:**')
st.write(data_df.head())
st.write('')
st.write('**Some basic statistics:**')
st.write('Number of data points =', len(data_df.index))
st.write('Number of features =', len(data_df.columns) - 1)

################################################################################

st.write('')
st.subheader('2.2 Exploration & Processing')

data = Data(data_df, sensitive_features, target_feature, pos_target)
data_df = data.data_df
bias_cols = data.bias_cols
target_col = data.target_col
feature_cols = data.feature_cols
bias_col_types = data.bias_col_types
categories = data.categories

# Save our processed data
data_df.to_csv('../data/processed/' + filename, index=False)
write_params_to_file(bias_cols, target_col, bias_col_types, categories)

################################################################################

st.write('')
st.subheader('2.3 Post-processing exploration')
        print "\nclass probs:"
        for c in self.clssprobs:
            print c, self.clssprobs[c]
        print "\nattr probs:"
        for c in self.clssprobs:
            print "\nclass", c, ":"
            for a in sorted(self.attrcnts):
                print a, self.condprobs[a, c]

if __name__ == "__main__":

    from confmat import ConfMat

    filename = "ds/weatherNominalTr.txt"

    d = Data(filename)

    ##    d.report()

    pr = MaxAPost(d)
    pr.train()
    cm = ConfMat(pr.clsscnts)
    ##    print
    for (v, c_true) in d.test_set:
        c_pred = pr.predict(v)[0]
        ##        print v, c_pred, "( true class:", c_true, ")"
        cm.mat[c_pred, c_true] += 1
    print
    ##    pr.show()
    ##    print
    cm.report()
args = parser.parse_args()

# set arguments
l_r = args.l_r
batch_size = args.batch_size
pickle_dir = args.pickle_dir
max_seq = args.max_seq
epochs = args.epochs
is_reuse = args.is_reuse
load_path = args.load_path
save_path = args.save_path
multi_gpu = args.multi_gpu

# load data
dataset = Data('dataset/processed')
print(dataset)

# load model
learning_rate = callback.CustomSchedule(par.embedding_dim)
opt = Adam(l_r, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

# define model
mt = MusicTransformer(embedding_dim=256,
                      vocab_size=par.vocab_size,
                      num_layer=6,
                      max_seq=max_seq,
                      dropout=0.2,
                      debug=False,
                      loader_path=load_path)
mt.compile(optimizer=opt, loss=callback.transformer_dist_train_loss)
Beispiel #11
0
from visualisation import plt_scores_lambs
from visualisation import plt_pred_obser
from visualisation import plt_residu_lambs
from visualisation import plt_scores_datasize
from visualisation import plt_square_lambs
from visualisation import plt_coefs_coefs
from sklearn.decomposition import PCA
from model_selection import _k_split
from visualisation import create_plots
import time
from data import Data

# Paste the dataset location below
location = 'testdata_rgb.txt'

d = Data()
# X, y = d.boston()
X, y = d.Residuals_Match_DMhydro_Less_z()

X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)

method = None
while method == None:
    l = input(
        'What method do you want to use ?\n a  Batch Gradient Descent\n b  Stochastic Gradient descent\n c  '
        'Mini-batch '
        'Gradient Descent\n d  Ordinary Least Squares Solution\n e  Coordinate Descent\n f Accelerated Proximal Gradient Descent\n g  Alternating Direction Method of Multipliers '
    )
    if l == "a":
        method = "bgd"
    elif l == "b":
from data import Data
from subset import PrivateDomain
from model import sess_runner

#main_dataset = Data(log=False,
#	filename='../data/3732_filtered.txt',
#	batch_size=50,
#	sep=' ')

main_dataset = Data(filename='../data/3732_filtered.txt',
        split=True,
        split_start=800,
	#additional_info='../data/gse80655_annotation.txt',
 	batch_size=50,
	sep=' ',
 	log=False)

supporting_dataset = Data(filename='../data/3732_filtered.txt',
        split=True,
        split_end=800,
        ind=1,
        #additional_info='../data/gse80655_annotation.txt',
        batch_size=50,
        sep=' ',
        log=False)


def runner():
	model = [PrivateDomain(main_dataset, delay=1, tagged=False)]

	model.append(PrivateDomain(supporting_dataset, 
Beispiel #13
0
if __name__ == "__main__":

    from naivebayes import NaiveBayes
    from data import Data

    print_numbers = False

    datafile = "ds/titanicTr.txt"
    pos_class = "Survived:Yes"
    #pos_class = "Survived:No"

    # datafile = "cmcTr.txt"
    # pos_class = "contraceptive-method:none"

    d = Data(datafile)

    prnb = NaiveBayes(d)
    prnb.train()

    r = Roc(prnb, pos_class)

    r.do_curve()

    print "Predicting", pos_class, "for data file", datafile,
    print "with", int(r.curve[2]), "positive instances and", int(
        r.curve[3]), "negative instances"

    if print_numbers:
        prnb.show()
Beispiel #14
0
    def saveStats(self):
        dt = Data()
        api = API()
        ut = Util()
        stats = Stats()

        curtime = ut.getGMTTime()
        clanlist = dt.read('', 'ClanList')

        for clan in clanlist:
            players = api.getClanMembers(api.getClanID(clan[0]))

            clanID = api.getClanID(clan[0])
            clanname = api.getClanTag(clanID)
            playernum = len(players)
            clanavgpr = 0.0
            clanavgbt = 0
            clanavgdmg = 0.0
            clanavgkills = 0.0
            clanavgwr = 0.0
            clanavgspd = 0.0
            clanavgptd = 0.0

            data2 = []
            data2.append([int(clanID)])
            data2.append([clanname])

            if players is not None:
                for player in players:

                    data = []

                    name = api.getPlayerName(player)
                    pr = stats.PRcalculate(player)

                    bt = api.getPlayerBattles(player)
                    if (bt == 0):
                        break

                    avgdmg = api.getPlayerAvgDmg(player)
                    avgwr = api.getPlayerAvgWR(player)
                    avgkills = api.getPlayerAvgKills(player)
                    avgspdmg = api.getPlayerAvgSpottingDmg(player)
                    avgptdmg = api.getPlayerAvgPotentialDmg(player)
                    # calculate avg dmg, wr,kills,

                    data.append([name])
                    data.append([player])
                    data.append([pr])
                    data.append([bt])
                    data.append([avgdmg])
                    data.append([avgkills])
                    data.append([avgwr])
                    data.append([avgspdmg])
                    data.append([avgptdmg])

                    clanavgpr += pr
                    clanavgbt += bt
                    clanavgdmg += avgdmg
                    clanavgkills += avgkills
                    clanavgwr += avgwr
                    clanavgspd += avgspdmg
                    clanavgptd += avgptdmg

                    temppath = str(clan[0]) + "/" + str(name)
                    filename = str(curtime) + ".csv"

                    print(temppath + " " + filename)
                    print(data)
                    dt.write(temppath, filename, data)

            data2.append([float(clanavgpr / playernum)])
            data2.append([int(clanavgbt / playernum)])
            data2.append([float(clanavgdmg / playernum)])
            data2.append([float(clanavgkills / playernum)])
            data2.append([float(clanavgwr / playernum)])
            data2.append([float(clanavgspd / playernum)])
            data2.append([float(clanavgptd / playernum)])

            dt.write(str(clan[0]), str(curtime) + ".csv", data2)

        return 0
Beispiel #15
0
"""
存放了一些测试中产生的代码 Stored some code generated in the test
# @Author   : Tian Xiao
"""
from LassoRegression import LassoRegression
from data import Data
import numpy as np
from model_selection import train_test_split

X, y = Data().multi_data_boston()


def poly_test(X, y, degree=1):
    X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
    poly_reg = LassoRegression(degree=degree)
    poly_reg.fit(X_train, y_train, lasso=False, method="normal")
    print(poly_reg.score(X_test, y_test))
    X_test = X_test[:5]
    y_predict = poly_reg.predict(X_test)
    y_true = y_test[:5]
    for i in range(len(y_true)):
        print(y_true[i], y_predict[i])
    print()


def lasso_test(X, y, degree=1):
    X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
    lasso_reg = LassoRegression(degree=degree)
    lasso_reg.fit(X_train, y_train, lasso=True, method="bgd")
    print(lasso_reg.score(X_test, y_test))
    X_test = X_test[:5]
Beispiel #16
0
def main(out_file='output/result.json', model_config='config/rnn_config.json'):
    """Test model for given test set on 1 GPU or CPU.

    Args:
        in_file: file to be tested
        out_file: output file
        model_config: config file
    """
    # 0. Load config
    with open(model_config) as fin:
        config = json.load(fin, object_hook=lambda d: SimpleNamespace(**d))
    if torch.cuda.is_available():
        device = torch.device('cuda')
        # device = torch.device('cpu')
    else:
        device = torch.device('cpu')

    #0. preprocess file
    # id_list = []
    # with open(in_file, 'r', encoding='utf-8') as fin:
    #     for line in fin:
    #         sents = json.loads(line.strip())
    #         id = sents['id']
    #         id_list.append(id)
    # id_dict = dict(zip(range(len(id_list)), id_list))

    # 1. Load data
    data = Data(vocab_file=os.path.join(config.model_path, 'vocab.txt'),
                max_seq_len=config.max_seq_len,
                model_type=config.model_type,
                config=config)
    test_set, sc_list, label_list = data.load_file(config.test_file_path,
                                                   train=False)

    token_list = []
    for line in sc_list:
        tokens = data.tokenizer.convert_ids_to_tokens(line)
        token_list.append(tokens)

    data_loader_test = DataLoader(test_set,
                                  batch_size=config.batch_size,
                                  shuffle=False)
    # 2. Load model
    model = MODEL_MAP[config.model_type](config)
    model = load_torch_model(model,
                             model_path=os.path.join(config.model_path,
                                                     'model.bin'))
    model.to(device)
    # 3. Evaluate
    answer_list, length_list = evaluate(model,
                                        data_loader_test,
                                        device,
                                        isTest=True)

    def flatten(ll):
        return list(itertools.chain(*ll))

    # train_answers = handy_tool(label_list, length_list) #gold
    # #answer_list = handy_tool(answer_list, length_list) #prediction
    # train_answers = flatten(train_answers)
    # train_predictions = flatten(answer_list)
    #
    # train_acc, train_f1 = calculate_accuracy_f1(
    #     train_answers, train_predictions)
    # print(train_acc, train_f1)
    test_json = json.load(open(config.test_file_path, 'r', encoding='utf-8'))
    id_list = [item['id'] for item in test_json]

    mod_tokens_list = handy_tool(token_list, length_list)
    result = [
        result_to_json(t, s) for t, s in zip(mod_tokens_list, answer_list)
    ]

    # 4. Write answers to file
    with open(out_file, 'w', encoding='utf8') as fout:
        result_list = []
        for id, item in zip(id_list, result):
            entities = item['entities']
            words = [
                d['word'] + "-" + d['type'] for d in entities
                if d['type'] != 's'
            ]
            unique_words = []
            for w in words:
                if w not in unique_words:
                    unique_words.append(w)
            item = {}
            item['id'] = id
            item['entities'] = unique_words
            result_list.append(item)
        json.dump(result_list, fout, ensure_ascii=False, indent=4)
Beispiel #17
0
def infer(flowtron_path, waveglow_path, text, speaker_id, n_frames, sigma,
          seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    # load waveglow
    waveglow = torch.load(waveglow_path)['model'].cuda().eval()
    waveglow.cuda().half()
    for k in waveglow.convinv:
        k.float()
    waveglow.eval()

    # load flowtron
    model = Flowtron(**model_config).cuda()
    cpt_dict = torch.load(flowtron_path)
    if 'model' in cpt_dict:
        dummy_dict = cpt_dict['model'].state_dict()
    else:
        dummy_dict = cpt_dict['state_dict']
    model.load_state_dict(dummy_dict)
    model.eval()

    print("Loaded checkpoint '{}')".format(flowtron_path))

    ignore_keys = ['training_files', 'validation_files']
    trainset = Data(
        data_config['training_files'],
        **dict((k, v) for k, v in data_config.items() if k not in ignore_keys))

    tic_prep = time.time()

    str_text = text
    num_char = len(str_text)
    num_word = len(str_text.split())

    speaker_vecs = trainset.get_speaker_id(speaker_id).cuda()
    text = trainset.get_text(text).cuda()

    speaker_vecs = speaker_vecs[None]
    text = text[None]
    toc_prep = time.time()

    ############## warm up   ########### to measure exact flowtron inference time

    with torch.no_grad():
        tic_warmup = time.time()
        residual = torch.cuda.FloatTensor(1, 80, n_frames).normal_() * sigma
        mels, attentions = model.infer(residual, speaker_vecs, text)
        toc_warmup = time.time()

    tic_flowtron = time.time()
    with torch.no_grad(
    ):  #,torch.autograd.profiler.emit_nvtx(): ########### prof.
        tic_residual = time.time()
        residual = torch.cuda.FloatTensor(1, 80, n_frames).normal_() * sigma
        toc_residual = time.time()
        # profiler.start()  ########### prof.
        mels, attentions = model.infer(residual, speaker_vecs, text)
        # profiler.stop()    ########### prof.
        toc_flowtron = time.time()

    for k in range(len(attentions)):
        attention = torch.cat(attentions[k]).cpu().numpy()
        fig, axes = plt.subplots(1, 2, figsize=(16, 4))
        axes[0].imshow(mels[0].cpu().numpy(), origin='bottom', aspect='auto')
        axes[1].imshow(attention[:, 0].transpose(),
                       origin='bottom',
                       aspect='auto')
        fig.savefig('sid{}_sigma{}_attnlayer{}.png'.format(
            speaker_id, sigma, k))
        plt.close("all")

    tic_waveglow = time.time()
    audio = waveglow.infer(mels.half(), sigma=0.8).float()
    toc_waveglow = time.time()

    audio = audio.cpu().numpy()[0]
    # normalize audio for now
    audio = audio / np.abs(audio).max()

    len_audio = len(audio)
    dur_audio = len_audio / 22050
    num_frames = int(len_audio / 256)

    dur_prep = toc_prep - tic_prep
    dur_residual = toc_residual - tic_residual
    dur_flowtron_in = toc_flowtron - toc_residual
    dur_warmup = toc_warmup - tic_warmup
    dur_flowtron_out = toc_flowtron - tic_residual
    dur_waveglow = toc_waveglow - tic_waveglow
    dur_total = dur_prep + dur_flowtron_out + dur_waveglow

    RTF = dur_audio / dur_total

    str_text = "\n text : " + str_text
    str_num = "\n text {:d} char {:d} words  ".format(num_char, num_word)
    str_audio = "\n generated audio : {:2.3f} samples  {:2.3f} sec  with  {:d} mel frames ".format(
        len_audio, dur_audio, num_frames)
    str_perf = "\n total time {:2.3f} = text prep {:2.3f} + flowtron{:2.3f} + wg {:2.3f}  ".format(
        dur_total, dur_prep, dur_flowtron_out, dur_waveglow)
    str_flow = "\n total flowtron {:2.3f} = residual cal {:2.3f} + flowtron {:2.3f}  ".format(
        dur_flowtron_out, dur_residual, dur_flowtron_in)
    str_rtf = "\n RTF is {:2.3f} x  with warm up {:2.3f} ".format(
        RTF, dur_warmup)

    print(str_text, str_num, str_audio, str_perf, str_flow, str_rtf)

    write("sid{}_sigma{}.wav".format(speaker_id, sigma),
          data_config['sampling_rate'], audio)
Beispiel #18
0
def main():
    parser = argparse.ArgumentParser(description="Experiment setup")
    # misc
    parser.add_argument('--seed', default=33, type=int)
    parser.add_argument('--gpu', default="", type=str)
    parser.add_argument('--no_train', default=False, action="store_true")
    parser.add_argument('--from_model_ckpt', default=None, type=str)
    parser.add_argument('--no_rules', default=False, action="store_true")
    parser.add_argument('--rule_thr', default=1e-2, type=float)
    parser.add_argument('--no_preds', default=False, action="store_true")
    parser.add_argument('--get_vocab_embed',
                        default=False,
                        action="store_true")
    parser.add_argument('--exps_dir', default=None, type=str)
    parser.add_argument('--exp_name', default=None, type=str)
    # data property
    parser.add_argument('--datadir', default=None, type=str)
    parser.add_argument('--resplit', default=False, action="store_true")
    parser.add_argument('--no_link_percent', default=0., type=float)
    parser.add_argument('--type_check', default=False, action="store_true")
    parser.add_argument('--domain_size', default=128, type=int)
    parser.add_argument('--no_extra_facts', default=False, action="store_true")
    parser.add_argument('--query_is_language',
                        default=False,
                        action="store_true")
    parser.add_argument('--vocab_embed_size', default=128, type=int)
    # model architecture
    parser.add_argument('--num_step', default=3, type=int)
    parser.add_argument('--num_layer', default=1, type=int)
    parser.add_argument('--rnn_state_size', default=128, type=int)
    parser.add_argument('--query_embed_size', default=128, type=int)
    # optimization
    parser.add_argument('--batch_size', default=64, type=int)
    parser.add_argument('--print_per_batch', default=3, type=int)
    parser.add_argument('--max_epoch', default=10, type=int)
    parser.add_argument('--min_epoch', default=5, type=int)
    parser.add_argument('--learning_rate', default=0.001, type=float)
    parser.add_argument('--no_norm', default=False, action="store_true")
    parser.add_argument('--thr', default=1e-20, type=float)
    parser.add_argument('--dropout', default=0., type=float)
    # evaluation
    parser.add_argument('--get_phead', default=False, action="store_true")
    parser.add_argument('--adv_rank', default=False, action="store_true")
    parser.add_argument('--rand_break', default=False, action="store_true")
    parser.add_argument('--accuracy', default=False, action="store_true")
    parser.add_argument('--top_k', default=10, type=int)

    d = vars(parser.parse_args())
    option = Option(d)
    if option.exp_name is None:
        option.tag = time.strftime("%y-%m-%d-%H-%M")
    else:
        option.tag = option.exp_name
    if option.resplit:
        assert not option.no_extra_facts
    if option.accuracy:
        assert option.top_k == 1

    os.environ["CUDA_VISIBLE_DEVICES"] = option.gpu
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

    if not option.query_is_language:
        data = Data(option.datadir, option.seed, option.type_check,
                    option.domain_size, option.no_extra_facts)
    else:
        data = DataPlus(option.datadir, option.seed)
    print("Data prepared.")

    option.num_entity = data.num_entity
    option.num_operator = data.num_operator
    if not option.query_is_language:
        option.num_query = data.num_query
    else:
        option.num_vocab = data.num_vocab
        option.num_word = data.num_word  # the number of words in each query

    option.this_expsdir = os.path.join(option.exps_dir, option.tag)
    if not os.path.exists(option.this_expsdir):
        os.makedirs(option.this_expsdir)
    option.ckpt_dir = os.path.join(option.this_expsdir, "ckpt")
    if not os.path.exists(option.ckpt_dir):
        os.makedirs(option.ckpt_dir)
    option.model_path = os.path.join(option.ckpt_dir, "model")

    option.save()
    print("Option saved.")

    ## build the model
    learner = Learner(option)
    print("Model built.")

    saver = tf.train.Saver()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = False
    config.log_device_placement = False
    config.allow_soft_placement = True
    with tf.Session(config=config) as sess:
        tf.set_random_seed(option.seed)
        sess.run(tf.global_variables_initializer())
        print("Session initialized.")

        if option.from_model_ckpt is not None:
            saver.restore(sess, option.from_model_ckpt)
            print("Checkpoint restored from model %s" % option.from_model_ckpt)

        data.reset(option.batch_size)
        experiment = Experiment(sess, saver, option, learner, data)
        print("Experiment created.")

        if not option.no_train:
            print("Start training...")
            experiment.train()

        if not option.no_preds:
            print("Start getting test predictions...")
            experiment.get_predictions()

        if not option.no_rules:
            print("Start getting rules...")
            experiment.get_rules()

        if option.get_vocab_embed:
            print("Start getting vocabulary embedding...")
            experiment.get_vocab_embedding()

    experiment.close_log_file()
    print("=" * 36 + "Finish" + "=" * 36)
Beispiel #19
0
def getIncomeTable(strid):

    data = Data()
    #-------損益表
    length_incomeTable = len(data.dates['incomeStatement'])
    incomeTable = data.get('營業收入合計', length_incomeTable)[strid].to_frame()
    incomeTable = incomeTable.reset_index()
    incomeTable['date'] = incomeTable['date'].apply(modifMonthtoSeason)
    copyIncomeTable = incomeTable.copy()


    #若為金控產業等要看淨收益
    rawData = data.get('營業收入合計', length_incomeTable)
    allnanlist = rawData.columns[rawData.isna().any()]
    if strid in allnanlist:
        incomeTable = data.get('淨收益', length_incomeTable)[strid].to_frame()
        if math.isnan(incomeTable[strid][0]):#有些產業則是叫收入合計6005、6024待處理
            incomeTable = data.get('收入合計', length_incomeTable)[strid].to_frame()
        incomeTable = incomeTable.reset_index()
        incomeTable['date'] = incomeTable['date'].apply(modifMonthtoSeason)
        copyIncomeTable = incomeTable.copy()
        counter1 = 0
        for i in incomeTable['date']:
            if i.find('Q04') != -1:#是第四季
                for j in range(counter1-1,counter1-3-1,-1):
                    incomeTable.at[counter1, strid] = incomeTable.at[counter1, strid] - copyIncomeTable.at[j, strid]
                counter1+=1    
            else:
                counter1+=1
    else:
        #檢查是否為第四季
        counter = 0
        for i in incomeTable['date']:
            if i.find('Q04') != -1:#是第四季
                for j in range(counter-1,counter-3-1,-1):
                    incomeTable.at[counter, strid] = incomeTable.at[counter, strid] - copyIncomeTable.at[j, strid]
                counter+=1    
            else:
                counter+=1
    
    incomeTable.set_index('date', inplace=True)

    #---------稅前淨利
    length_preTaxIncome = len(data.dates['incomeStatement'])
    preTaxIncome = data.get('繼續營業單位稅前淨利(淨損)', length_preTaxIncome)[strid].to_frame()
    preTaxIncome = preTaxIncome.reset_index()
    preTaxIncome['date'] = preTaxIncome['date'].apply(modifMonthtoSeason)
    copyPreTax = preTaxIncome.copy()

    counter_preTax = 0
    for i in preTaxIncome['date']:
        if counter_preTax%4 != 0:#不是第一季
            preTaxIncome.at[counter_preTax, strid] = preTaxIncome.at[counter_preTax, strid] - copyPreTax.at[counter_preTax-1, strid]
            counter_preTax+=1
        else:
            counter_preTax+=1
    
    preTaxIncome.set_index('date', inplace=True)

    #---------End稅前淨利

    incomeStatementfig = [{
        'x' : incomeTable.index,
        'y' : incomeTable[strid],
        'type':'line',
        'name':'營業收入',
        'hovertemplate': "%{x}營業收入= %{y:$,}"
    },{
        'x' : preTaxIncome.index,
        'y' : preTaxIncome[strid],
        'type':'line',
        'name':'稅前淨利',
        'hovertemplate': "%{x}稅前淨利= %{y:$,}"
    }]
    #-------End 損益表

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=(incomeStatementfig[0])['x'], y=(incomeStatementfig[0])['y'], name='營業收入', hovertemplate='%{x}營業收入= %{y:$,}'))
    fig.add_trace(go.Scatter(x=(incomeStatementfig[1])['x'], y=(incomeStatementfig[1])['y'], name='稅前淨利', hovertemplate="%{x}稅前淨利= %{y:$,}"))
    fig.update_layout(
        plot_bgcolor = '#36404A',
        paper_bgcolor = '#36404A',
        font_color = '#7FDBFF'
    )

    return fig
Beispiel #20
0

def compute_accuracy(data, predictions):
    ground_truth = np.array([ex['label']
                             for ex in data])  #Array of Label Dataset
    predictions = np.array(predictions)
    assert len(ground_truth) == len(predictions)

    return np.sum(np.equal(ground_truth, predictions)) / float(
        len(ground_truth))


if __name__ == '__main__':

    data = {
        'train': Data('data/train2017.csv').get_train_data(),
        'valid_2016': Data('data/valid2016.csv').get_validtest_data(),
        'valid_2018': Data('data/valid2018.csv').get_validtest_data(),
        'test': Data('data/test2016.csv').get_validtest_data()
    }

    embedded_data = dict()
    train_context_embs, train_ending_embs = np.random.rand(
        5000, 768), np.random.rand(5000, 768)
    embedded_data['train'] = {
        'context': train_context_embs,
        'ending': train_ending_embs
    }
    valid_2016_context_embs, valid_2016_ending_0_embs, valid_2016_ending_1_embs = np.random.rand(
        1871, 768), np.random.rand(1871, 768), np.random.rand(1871, 768)
    valid_2018_context_embs, valid_2018_ending_0_embs, valid_2018_ending_1_embs = np.random.rand(
Beispiel #21
0
def main_worker(rank, world_size, args):
    args.gpu = args.gpus[rank]
    if rank == 0:
        writer = SummaryWriter(osp.join('exp', args.exp))
    print(f'==> Rank={rank}, Use GPU: {args.gpu} for training.')
    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=world_size, rank=rank)

    torch.cuda.set_device(args.gpu)

    model = ModelDSR(
        object_num=args.object_num,
        transform_type=args.transform_type,
        motion_type='se3' if args.model_type != '3dflow' else 'conv',
    )

    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.95))

    if args.resume is not None:
        checkpoint = torch.load(args.resume, map_location=torch.device(f'cuda:{args.gpu}'))
        model.load_state_dict(checkpoint['state_dict'])
        print(f'==> rank={rank}, loaded checkpoint {args.resume}')

    data, samplers, loaders = {}, {}, {}
    for split in ['train', 'test']:
        data[split] = Data(data_path=args.data_path, split=split, seq_len=args.seq_len)
        samplers[split] = torch.utils.data.distributed.DistributedSampler(data[split])
        loaders[split] = DataLoader(
            dataset=data[split],
            batch_size=args.batch,
            num_workers=args.workers,
            sampler=samplers[split],
            pin_memory=False
        )
    print('==> dataset loaded: [size] = {0} + {1}'.format(len(data['train']), len(data['test'])))

    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])

    for epoch in range(args.epoch):
        samplers['train'].set_epoch(epoch)
        lr = adjust_learning_rate(optimizer, epoch, args)
        if rank == 0:
            print(f'==> epoch = {epoch}, lr = {lr}')

        with torch.enable_grad():
            loss_tensor_train = iterate(loaders['train'], model, optimizer, rank, args)
        with torch.no_grad():
            loss_tensor_test = iterate(loaders['test'], model, None, rank, args)

        # tensorboard log
        loss_tensor = torch.stack([loss_tensor_train, loss_tensor_test]).cuda()
        torch.distributed.all_reduce(loss_tensor)
        if rank == 0:
            training_step = (epoch + 1) * len(data['train'])
            loss_tensor = loss_tensor.cpu().numpy()
            for i, split in enumerate(['train', 'test']):
                for j, loss_type in enumerate(args.loss_types):
                    for step_id in range(args.seq_len):
                        writer.add_scalar(
                            '%s-loss_%s/%d' % (split, loss_type, step_id),
                            loss_tensor[i, j, step_id] / len(data[split]), epoch+1)
            writer.add_scalar('learning_rate', lr, epoch + 1)

        if rank == 0 and (epoch + 1) % args.snapshot_freq == 0:
            visualize(loaders, model, epoch, args)
            save_state = {
                'state_dict': model.module.state_dict(),
            }
            torch.save(save_state, osp.join(args.model_dir, 'latest.pth'))
            shutil.copyfile(
                osp.join(args.model_dir, 'latest.pth'),
                osp.join(args.model_dir, 'epoch_%d.pth' % (epoch + 1))
            )
 def __init__(self, transform=None):
     self.transform = transform
     self.data = Data()
     self.init_batch()
# set arguments
l_r = args.l_r
batch_size = args.batch_size
pickle_dir = args.pickle_dir
max_seq = args.max_seq
epochs = args.epochs
is_reuse = args.is_reuse
load_path = args.load_path
save_path = args.save_path
multi_gpu = args.multi_gpu
num_layer = args.num_layers


# load data
#dataset = Data('dataset/processed')
dataset = Data(pickle_dir)
print(dataset)


# load model
learning_rate = callback.CustomSchedule(par.embedding_dim) if l_r is None else l_r
opt = Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)


# define model
mt = MusicTransformerDecoder(
            embedding_dim=256,
            vocab_size=par.vocab_size,
            num_layer=num_layer,
            max_seq=max_seq,
            dropout=0.2,
def main():
    """
    This is the main function that ties all other components together:
    """

    # Read the cert data
    cert_data = "\n".join(
        list(c.wincerts.get_pems()) +
        list(c.wincerts.get_pems_wincertstore())).encode()

    # Write the cert data to a temporary file
    handle = tempfile.NamedTemporaryFile(delete=False)
    handle.write(cert_data)
    handle.flush()

    # Set the temporary file name to an environment variable for the requests package
    os.environ['REQUESTS_CA_BUNDLE'] = handle.name

    logging.basicConfig(
        format=
        '%(asctime)s %(name)-12s %(levelname)-8s %(filename)s %(funcName)s %(message)s',
        datefmt='%m-%d %H:%M:%S',
        level=logging.INFO,
        filename="amp_health_checker_log.log")
    logging.warning("AMP Health Checker logging level is %s",
                    logging.getLevelName(logging.getLogger().level))
    logging.debug("%s: Starting Health Checker", time.ctime())

    try:
        settings_manager = SettingsManager()
        settings_manager.load_from_disk()
    except json.decoder.JSONDecodeError as e:
        errmsg = '%s: line %d column %d (char %d)' % (e.msg, e.lineno, e.colno,
                                                      e.pos)
        sg.Popup(
            f"Configration file is not valid JSON. Cannot proceed.\n{errmsg}",
            title="AMP not found")
        logging.critical(
            "Configration file is not valid JSON. Cannot proceed.")
        exit(1)
    except SchemaError as e:
        sg.Popup(
            f"Configuration file contains bad Schema. Cannot Proceed.\n{e.code}",
            title="AMP not found")
        logging.critical(
            f"Configuration file contains bad Schema. Cannot Proceed. {e.code}"
        )
        exit(1)
    except Exception as e:
        sg.Popup(f"Unknown Error. Cannot Proceed.\n{str(e)}",
                 title="AMP not found")
        logging.critical(f"Unknown Error. Cannot Proceed. {str(e)}")
        exit(1)

    x_count = 0

    button_size = (20, 1)
    layout = [
        [
            sg.Text("AMP Version: ",
                    tooltip="The current AMP version running on the system."),
            sg.Text("Loading...", key='_version')
        ],
        [
            sg.Text(
                "CPU Usage: ",
                tooltip="The current amount of CPU utilized by AMP executables."
            ),
            sg.Text("0", key='_cpu', size=(5, 1))
        ],
        [
            sg.Text("AMP Uptime: ", size=(10, 1)),
            sg.Text("",
                    size=(27, 1),
                    key="_uptime",
                    tooltip="Time since AMP was last stopped")
        ],
        [
            sg.Text(
                "Isolation: ",
                tooltip="Shows if the connector is Isolated or Not Isolated. "
                "Refresh with Refresh button."),
            sg.Text("", size=(12, 1), key="_isolated"),
            sg.Text(
                "",
                tooltip=
                "If Isolated, shows the unlock code. Requires valid API Credentials .",
                size=(17, 1),
                key="_unlock_code")
        ], [sg.Text('_' * 50)],
        [
            sg.Text("TETRA Version: ", size=(11, 1)),
            sg.Text("",
                    size=(8, 1),
                    key="_tetra_version",
                    tooltip="Shows the local TETRA version.\n"
                    "Green if up to date.\n"
                    "Yellow if not within last 5 or connectivity error "
                    "to API.\nRed if TETRA is not enabled."),
            sg.Button(
                'Check TETRA Version',
                size=button_size,
                button_color=default_button_color,
                key='_tetra_version_button',
                tooltip=
                "Checks the API to see if TETRA is up to date. Requires Valid API Credentials."
            ),
            sg.Text("", key="_latest_tetra_version", size=(8, 1))
        ],
        [
            sg.Text("Policy Serial: ", size=(11, 1)),
            sg.Text(
                "",
                size=(8, 1),
                key="_policy_version",
                tooltip="Shows the current policy serial number.\n"
                "Green if this matches the cloud version.\n"
                "Gray if there is a connectivity issue or invalid API Credentials.\n"
                "Red if the local policy doesn't match the cloud version.  Try syncing policy."
            ),
            sg.Button(
                "Check Policy Version",
                size=button_size,
                button_color=default_button_color,
                key='_policy_version_button',
                tooltip="Checks the API to see if the policy is up to date."),
            sg.Text("", key="_latest_policy_version", size=(8, 1))
        ],
        [
            sg.Text(
                "API Credentials: ",
                size=(13,
                      1),
                tooltip='Shows if the currently stored API '
                'Credentials are valid. Can read from text file named "apiCreds.txt" in the local directory.\n'
                'Must be in this format:\n'
                'client_id="abcdabcdabcdabcdabcd"\n'
                'api_key="abcd1234-abcd-1234-abcd-abcd1234abcd"'),
            sg.Text("", size=(6, 1), key="_api_cred_valid"),
            sg.Button("Add API Credentials",
                      button_color=default_button_color,
                      size=button_size,
                      key="-API-CREDS-",
                      tooltip="Allows user to manually input API Credentials.")
        ], [sg.Text('_' * 50)],
        [
            sg.Button(
                "Live Debugging",
                button_color=default_button_color,
                size=button_size,
                tooltip=
                "Live analysis used for determining potential exclusions."),
            sg.Button(
                "Run Analysis",
                button_color=default_button_color,
                size=button_size,
                tooltip=
                "Runs analysis on the sfc.exe.log file to provide information on potential exclusions."
            )
        ],
        [
            sg.Button(
                "Live Top Processes",
                button_color=default_button_color,
                size=button_size,
                tooltip=
                "Shows the top processes seen on the system in a live view."),
            sg.Button(
                "Top IPs",
                button_color=default_button_color,
                size=button_size,
                tooltip=
                "Shows the top IP addresses seen on the system in a live view."
            )
        ],
        [
            sg.Button(
                "Connectivity Test",
                button_color=default_button_color,
                size=button_size,
                key="_connectivity_test",
                tooltip=
                "Test connection to the required servers for AMP operations."),
            sg.Button(
                "Check Engines",
                button_color=default_button_color,
                size=button_size,
                tooltip=
                "Provides a quick view of which AMP engines are enabled on the system."
            )
        ],
        [
            sg.Button(
                "View Exclusions",
                button_color=default_button_color,
                size=button_size,
                tooltip=
                "Shows the file and process exclusions from the local policy."
            ),
            sg.Button(
                "Manual SFC Analysis",
                button_color=default_button_color,
                size=button_size,
                tooltip=
                "Allows importing external sfc.exe.log files for analysis.")
        ],
        [
            sg.Button(
                "Generate Diagnostic",
                button_color=default_button_color,
                size=button_size,
                tooltip=
                "Generate AMP diagnostic bundle with AMP Health Checker log. Both files "
                "will be on the desktop."),
            sg.Button(
                "Settings",
                button_color=default_button_color,
                size=button_size,
                tooltip=
                "Add settings view/file 4.1 credentials for api 4.2 api endpoint "
                "4.3 update endpoint 4.4 endpoint list")
        ],
        [
            sg.Text(
                'Log Level: ',
                tooltip=
                "Select higher log level if requested by the tool developers."
            ),
            sg.Button('INFO', button_color=active_button_color, key='_INFO'),
            sg.Button('WARNING',
                      button_color=default_button_color,
                      key="_WARNING"),
            sg.Button('DEBUG', button_color=default_button_color, key="_DEBUG")
        ], [sg.Text('', size=(8, 1))],
        [
            sg.Text('', size=(13, 1)),
            sg.Button(
                "Refresh",
                size=(7, 1),
                button_color=default_button_color,
                tooltip="Refreshes calculated data, including Isolation Status."
            ),
            sg.Button("Cancel",
                      button_color=default_button_color,
                      tooltip="Exits the program.")
        ]
    ]
    logging.debug('test')
    window = sg.Window("AMP Health Check",
                       layout,
                       size=(480, 540),
                       margins=(60, 10))

    is_first = True
    d_instance = Data(settings_manager)
    while True:
        if is_first:
            event, values = window.Read(timeout=0)
            logging.debug('Event - %s : Values - %s', event, values)
            is_first = False
        else:
            event, values = window.Read(timeout=5000)

        if x_count < 10:
            x_count += 1
        else:
            if d_instance.api_cred_valid:
                d_instance.update_api_calls(settings_manager)
            x_count = 0
        d_instance.update(settings_manager)
        logging.debug('Self Scan Count = %s', d_instance.internal_health_check)
        window.FindElement('_version').Update(d_instance.version)
        window.FindElement('_cpu').Update(d_instance.current_cpu)
        window.FindElement('_uptime').Update(d_instance.converted_uptime)
        window.FindElement('_tetra_version').Update(
            d_instance.tetra_version_display)
        window.FindElement('_policy_version').Update(
            d_instance.policy_dict['policy_sn'])
        window.FindElement('_api_cred_valid').Update('Valid' if d_instance.api_cred_valid \
                                                         else 'Invalid')
        window.FindElement('_isolated').Update(d_instance.isolated)
        window.FindElement('_unlock_code').Update(d_instance.unlock_code)
        if event in (None, "Cancel"):
            break
        elif event == "_INFO":
            logging.getLogger().setLevel(logging.INFO)
            logging.info('Log level changed to %s',
                         logging.getLevelName(logging.getLogger().level))
            window.FindElement('_INFO').Update(
                button_color=active_button_color)
            window.FindElement('_WARNING').Update(
                button_color=default_button_color)
            window.FindElement('_DEBUG').Update(
                button_color=default_button_color)
            window.Refresh()
        elif event == '_WARNING':
            logging.getLogger().setLevel(logging.WARNING)
            logging.warning('Log level changed to %s',
                            logging.getLevelName(logging.getLogger().level))
            window.FindElement('_INFO').Update(
                button_color=default_button_color)
            window.FindElement('_WARNING').Update(
                button_color=active_button_color)
            window.FindElement('_DEBUG').Update(
                button_color=default_button_color)
            d_instance.verify_api_creds(settings_manager)
            window.Refresh()
        elif event == '_DEBUG':
            logging.getLogger().setLevel(logging.DEBUG)
            logging.debug('Log level changed to %s',
                          logging.getLevelName(logging.getLogger().level))
            window.FindElement('_INFO').Update(
                button_color=default_button_color)
            window.FindElement('_WARNING').Update(
                button_color=default_button_color)
            window.FindElement('_DEBUG').Update(
                button_color=active_button_color)
            d_instance.verify_api_creds(settings_manager)
            window.Refresh()
        elif event == "Live Debugging":
            popups.lpap(d_instance, settings_manager)
        elif event == "Live Top Processes":
            popups.just_process(d_instance, settings_manager)
        elif event == "_tetra_version_button":
            popups.check_latest_tetra(d_instance, window, settings_manager)
        elif event == "_policy_version_button":
            popups.check_latest_policy(d_instance, window, settings_manager)
        elif event == "_connectivity_test":
            popups.connectivity(d_instance, settings_manager)
        elif event == "Check Engines":
            popups.engines_enabled(d_instance)
        elif event == "View Exclusions":
            popups.view_exclusions(d_instance)
        elif event == "Run Analysis":
            popups.analysis(d_instance, settings_manager)
        elif event == "Top IPs":
            popups.topips(d_instance, settings_manager)
        elif event == "Refresh":
            d_instance.reset_data(settings_manager)
            window.Refresh()
        elif event == "-API-CREDS-":
            popups.get_api_credentials(d_instance, settings_manager)
        elif event == "Manual SFC Analysis":
            popups.manual_sfc(d_instance, settings_manager)
        elif event == "Generate Diagnostic":
            d_instance.generate_diagnostic()
            if any(d_instance.diag_failed):
                popups.diag_failed_popup(d_instance.diag_failed)
        elif event == "Settings":
            popups.settings(settings_manager)
    if d_instance.enabled_debug:
        d_instance.disable_debug()
    # update
    window.close()
 def setVal(self, i, val):
     self.data[i] = Data(val)
class GenMember(object):
    """
    Class that is used to create valid mathematical expressions, get the fitness of the each of the individuals in the
    population, select two parents, and also to update the population once the children are ready to be added into the
    new population.

    """

    # Read the data from the text file
    d = Data('dataset2.txt')
    read_data = d.read_data(shuffle_d=False)
    data = read_data[0]
    labels = read_data[1]

    # the set of functional values. - consider expanding this.
    operations = ['+', '-', '*', '/']

    def generate_expression(self, max_depth=4):
        """
        Function to generate a valid mathematical expression. An expression consists of values from the functional
        set -> ['+', '-', '*', '/'] and values from a terminal set -> [random number between 0-50, X1,...,X5] where
        X1,..., are Altman's KPI ratios.
        :param max_depth: maximum depth of the regression tree.
        :return: valid expression <= maximum depth of tree.
        """

        # print out either a random number between 0 and 50, or a variable X1-X5.
        if max_depth == 1:
            terminals = [random() * 50, "X1", "X2", 'X3', "X4",
                         "X5"]  # random() * 50,
            return self.__str__(choice(terminals))

        # include bracketing 20% of the time.
        rand = random()
        if rand <= 0.2:
            return '(' + self.generate_expression(max_depth - 1) + choice(
                self.operations) + self.generate_expression(max_depth -
                                                            1) + ')'
        else:
            return self.generate_expression(max_depth - 1) + choice(
                self.operations) + self.generate_expression(max_depth - 1)

    def __str__(self, num):
        """
        cast terminal value to a string.
        :param num: the value to be parsed as a string.
        :return: value parsed as a string
        """
        return str(num)

    def get_valid_expressions(self, max_depth, population_size):
        """
        function to ensure that each initial member of the population contains at least the variables X1,..,X5.
        :param max_depth: maximum depth of the tree.
        :param population_size: generate a user defined population size.
        :return: every individual in population as a list of strings.
        """
        expression_list = list()
        while len(expression_list) < population_size:
            # generate the expressions and cast them to strings.
            init = GenMember()
            exps = init.generate_expression(max_depth)
            str_exps = str(exps)
            expression_list.append(str_exps)
            # print out valid expressions which contain all the variables.
            expression_list = [
                item for item in expression_list
                if 'X1' and 'X2' and 'X3' and 'X4' and 'X5' in item
            ]
        return expression_list

    def get_fitness(self, expressions, child=False):
        """
        Function to get the fitness of the population. Fitness function based on Number of Hits method.
        :param expressions: list of expressions being passed in. If not first iteration, then expression comes in
        as a single expression string and is converted to a list containing the child expression to be evaluated.
        :param child: if child is false, then assume first iteration -> get fitness of whole population. If child is
        true, then only get fitness of new children values, not total population.
        :return:
        """
        if child is True:
            exp_list = list()
            exp_list.append(expressions)
            expression = exp_list

        else:
            expression = expressions
        # get all the rows of the data being passed in to get the fitness.
        row = np.asarray(GenMember.data, dtype=object)

        # transpose the data to get all the X1 values in a list and repeat for X2,...,X5
        new_row = row.T
        # get the labels of the company data.
        labels = GenMember.labels

        # store the data in the variables to make evaluation of expression easier.
        X1 = new_row[0]  # length = len of data set
        X2 = new_row[1]
        X3 = new_row[2]
        X4 = new_row[3]
        X5 = new_row[4]
        predictions = list()

        for ex in expression:
            tmp = list()
            try:
                # evaluate the expression
                x = eval(ex)
                # if evaluation does not contain any variables from the terminal set
                if isinstance(x, float) or isinstance(x, int):
                    for l in range(len(X1)):
                        tmp = [x] * len(X1)
                    predictions.append(tmp)
                else:
                    # if the total is greater than 0 i.e. positive, append 0, else 1
                    for j in x:
                        if j >= 0:
                            tmp.append(1)
                        else:
                            tmp.append(0)
                    predictions.append(tmp)
            # if expression contains "/0" throw ZeroDivisionError and give individual a poor fitness.
            except ZeroDivisionError:
                # print("cannot divide by 0!!!")
                for k in range(len(X1)):
                    tmp = [9999] * len(X1)
                predictions.append(tmp)

        # get number of hits fitness.
        noh = list()
        for k in range(len(predictions)):
            tmp = list()
            [
                tmp.append(labels[j] == predictions[k][j])
                for j in range(len(predictions[k]))
            ]
            noh.append(tmp)
        fitness = [len(j) - sum(j) for j in noh]
        return fitness

    def tournament_selection(self, population, fitness, selection_size):
        """
        Function to select the parents of the population using tournament selection. Select n individuals from the
        population at random, and select the best two individuals from the selection to be the parents.
        :param population: the population generated - the list of expressions
        :param fitness: the population fitnesses
        :param selection_size: the number of individuals to compete against each other
        :return: two parents that will be used to create offspring - type: list(strings)
        """
        zipped_population = list(zip(population, fitness))
        # print("zipped population: ", zipped_population)

        # select potential candidate solutions to be assessed.
        candidates = sample(zipped_population, selection_size)
        # print("candidates:",candidates)

        # select the first parent with the best fitness out of the candidates
        parent_one = min(candidates, key=lambda t: t[1])
        # print(parent_one)
        p1_index = zipped_population.index(parent_one)
        # print(p1_index)
        # remove parent for now to prevent parent being selected twice
        zipped_population.pop(p1_index)
        # print("new popilation:", zipped_population)

        candidates = sample(zipped_population, selection_size)
        # select another sample and get the second parent
        parent_two = min(candidates, key=lambda t: t[1])
        p2_index = zipped_population.index(parent_two)
        zipped_population.pop(p2_index)

        # return the parents as a list of strings.
        parents = list()
        parents.append(parent_one)
        parents.append(parent_two)
        return parents

    def select_best_parents(self, population, fitness):
        """
        Function to select the best two parents in each the current population to be put forward for
        crossover adn mutation.
        :param population: the current population
        :param fitness: fitness of the current population
        :return: the two parents.
        """
        zipped_population = list(zip(population, fitness))
        parent_one = min(zipped_population, key=lambda t: t[1])
        p1_index = zipped_population.index(parent_one)

        zipped_population.pop(p1_index)
        parent_two = min(zipped_population, key=lambda t: t[1])
        p2_index = zipped_population.index(parent_two)
        zipped_population.pop(p2_index)

        parents = list()
        parents.append(parent_one)
        parents.append(parent_two)

        return parents

    def update_population(self, population, fitness, c1, child_fit1, c2,
                          child_fit2):
        """
        Function to update the population, by comparing the two worst individuals in the current population,
        with the two new children produced. Insert the children into the population if they have a better fitness
        relative to the two worst in the population to improve the population fitness.
        :param population: the current population
        :param fitness: fitness of each individual in the current population
        :param c1: first child produced
        :param child_fit1: first child produced fitness
        :param c2: second child produced
        :param child_fit2: second child produced fitness
        :return: the new updated population with the new population fitnesses.
        """
        # print("current population")
        # print(population)
        # print("fitenss: ")
        # print(fitness)
        child1 = list()
        child2 = list()

        child1.append(c1)
        child2.append(c2)

        zipped_population = list(zip(population, fitness))
        # print("zipped popn",zipped_population)
        child2 = list(zip(child2, child_fit2))
        # print("child2: ", child2)

        # # print("worst candidate 1: ")
        worst_one = max(zipped_population, key=lambda t: t[1])
        w1_index = zipped_population.index(worst_one)
        # print("worst one: ", worst_one)
        # if the child fitness is better than the worst in the population, replace them with first child
        if child_fit1[0] <= worst_one[1]:
            zipped_population.pop(w1_index)
            zipped_population.append((c1, child_fit1[0]))

        # if the child fitness is better than the worst in the population, replace them with first child
        worst_two = max(zipped_population, key=lambda t: t[1])
        w2_index = zipped_population.index(worst_two)
        # print("worst2: ", worst_two)

        if child_fit2[0] <= worst_two[1]:
            zipped_population.pop(w2_index)
            zipped_population.append((c2, child_fit2[0]))

        # print("zipped population: ", zipped_population)
        new_population = [i[0] for i in zipped_population]
        new_population_fitness = [i[1] for i in zipped_population]

        return new_population, new_population_fitness
Beispiel #27
0
def parse_parser_results(text):
    """ This is the nasty bit of code to interact with the command-line
    interface of the CoreNLP tools.  Takes a string of the parser results
    and then returns a Python list of dictionaries, one for each parsed
    sentence.
    """

    data = Data()

    state = STATE_START
    #for line in re.split("\r\n(?![^\[]*\])",text):
    for line in re.split("\r\n", text):
        line = line.strip()

        if line == 'NLP>':
            break
        if line.startswith("Sentence #"):
            state = STATE_TEXT

        elif state == STATE_TEXT:
            Data.newSen()
            data.addText(line)
            state = STATE_WORDS

        elif state == STATE_WORDS:
            if len(line) == 0:
                continue
            if not line.startswith("[Text="):
                raise Exception('Parse error. Could not find "[Text=" in: %s' %
                                line)
            for s in WORD_PATTERN.findall(line):
                t = parse_bracketed(s)
                if t[0] == '': continue
                data.addToken(t[0], t[1][u'CharacterOffsetBegin'],
                              t[1][u'CharacterOffsetEnd'], t[1][u'Lemma'],
                              t[1][u'PartOfSpeech'], t[1][u'NamedEntityTag'])
            state = STATE_TREE
            parsed = []

        elif state == STATE_TREE:
            if len(line) == 0:
                state = STATE_DEPENDENCY
                parsed = " ".join(parsed)
                #data.addTree(Tree.parse(parsed))
            else:
                parsed.append(line)

        elif state == STATE_DEPENDENCY:
            if len(line) == 0:
                state = STATE_COREFERENCE
            else:
                pass
                '''
                # don't need here
                split_entry = re.split("\(|, ", line[:-1])
                if len(split_entry) == 3:
                    rel, l_lemma, r_lemma = split_entry
                    m = re.match(r'(?P<lemma>.+)-(?P<index>[^-]+)', l_lemma)
                    l_lemma, l_index = m.group('lemma'), m.group('index')
                    m = re.match(r'(?P<lemma>.+)-(?P<index>[^-]+)', r_lemma)
                    r_lemma, r_index = m.group('lemma'), m.group('index')

                    data.addDependency( rel, l_lemma, r_lemma, l_index, r_index)
                '''
        elif state == STATE_COREFERENCE:
            if "Coreference set" in line:
                #if 'coref' not in results:
                #    results['coref'] = []
                coref_set = []
                data.addCoref(coref_set)
            else:
                for src_i, src_pos, src_l, src_r, sink_i, sink_pos, sink_l, sink_r, src_word, sink_word in CR_PATTERN.findall(
                        line):
                    src_i, src_pos, src_l, src_r = int(src_i), int(
                        src_pos), int(src_l), int(src_r)
                    sink_i, sink_pos, sink_l, sink_r = int(sink_i), int(
                        sink_pos), int(sink_l), int(sink_r)
                    coref_set.append(
                        ((src_word, src_i, src_pos, src_l, src_r),
                         (sink_word, sink_i, sink_pos, sink_l, sink_r)))

    return data
Beispiel #28
0
from data import Data

test = Data("unsupervised", preprocess=False)
test.save_csv_separate("test\\", "_test")
Beispiel #29
0
        for i in range(10):
            img_path = gallery_path[index[i]]
            print(img_path)

            ax = plt.subplot(1, 11, i + 2)
            ax.axis('off')
            plt.imshow(plt.imread(img_path))
            ax.set_title(img_path.split('/')[-1][:9])

        fig.savefig("show.png")
        print('result saved to show.png')


if __name__ == '__main__':
    data = Data()
    model = MGN()
    loss = Loss()
    main = Main(model, loss, data)

    if opt.mode == 'train':

        for epoch in range(1, opt.epoch + 1):
            print('\nepoch', epoch)
            main.train()
            if epoch % 50 == 0:
                print('\nstart evaluate')
                main.evaluate(epoch)
                os.makedirs('weights', exist_ok=True)
                torch.save(model.state_dict(),
                           ('weights/model_{}.pt'.format(epoch)))
# Quick'n'dirty SSL certificate generation:
#
# openssl genrsa -out webhook_pkey.pem 2048
# openssl req -new -x509 -days 3650 -key webhook_pkey.pem -out webhook_cert.pem
#
# When asked for "Common Name (e.g. server FQDN or YOUR name)" you should reply
# with the same value in you put in WEBHOOK_HOST

WEBHOOK_URL_BASE = "https://%s:%s" % (WEBHOOK_HOST, WEBHOOK_PORT)
WEBHOOK_URL_PATH = "/%s/" % (API_TOKEN)

logger.setLevel(logging.INFO)

bot = TeleBot(API_TOKEN)
data = Data(bot=bot)
system = System(data=data)

client = Client(data=data)
main_menu = Main(data=data)
order = Order(data=data, client=client)
channel = Channel(data=data, client=client)
redaction = Redaction(data=data, order=order)
account = Account(data=data)

app = flask.Flask(__name__)


# Empty webserver index, return nothing, just http 200
@app.route('/', methods=['GET', 'HEAD'])
def index():