def error_analyze(make_model, train_data, test_data, featurizer):
    matrices = Features.make_experiment_matrices(train_data, test_data, featurizer)
    model = make_model()
    model.fit(matrices['train_X'], matrices['train_Y'])
    bins = [v / 100.0 for v in range(50, 110, 5)]
    ext_preds = Models.extended_predict(model, matrices['test_X'], matrices['test_Y'])
    return Models.error_analysis(ext_preds, bins = bins)
Example #2
0
def main():
    if len(sys.argv) < 4:
        print('not enough arguments')
        sys.exit()
    else:
        purpose = sys.argv[1]
        model_name = sys.argv[2]
        # use sample data by default
        data_file = config.data_dir + "/" + sys.argv[3]

        if purpose == "train":
            model = Models.get_instance(model_name)
            train(data_file, model, model_name)
        elif purpose == "feature_selection":
            model = Models.get_instance(model_name)
            feature_selection(data_file, model)
        else:
            if len(sys.argv) < 5:
                print('not enough arguments')
                sys.exit()
            else:
                weight_file = sys.argv[4]
                model = joblib.load(weight_file)
                df = predict(data_file, model)
                df.to_csv(config.data_dir+"/prediction_result_" + get_time_str() + ".csv")
def log_results(out_file, msg, tw_cv, blog_cv, featurizer):
    if blog_cv:
        print msg + " blog(80%) -> blog(80%) CV-10"
        results_b = Models.model_cv(Models.LogisticRegression, blog_cv, featurizer, n_folds = 10)
        write_cv(out_file, msg + " b", results_b)
    if tw_cv and blog_cv:
        print msg + " twitter+wiki -> blog(80%)"
        results_twb = experiment_maxent(tw_cv, blog_cv, featurizer)
        write_detailed(out_file, msg + " twb", results_twb)
    if tw_cv:
        print msg + " twitter+wiki -> twitter+wiki CV-5"
        results_tw = Models.model_cv(Models.LogisticRegression, tw_cv, featurizer, n_folds = 5)
        write_cv(out_file, msg + " tw", results_tw)
    def post(self):
        self.response.out.write("added <br />")
        user = users.get_current_user()
        
        #See if the user choose to upload a payslip or other
        if self.request.POST['type'] == "payslip":

            self.response.out.write("payslip <br />")
            
            payslip = Models.Payslip(parent=Models.payslip_key(user.user_id()))
             
            #Set the model attributes 
            payslip.ownerId = user.user_id()
            payslip.beginning = datetime.datetime.strptime(self.request.POST['beginning'],'%Y-%m-%d').date()
            payslip.ending = datetime.datetime.strptime(self.request.POST['ending'],'%Y-%m-%d').date()
            payslip.income = float(self.request.POST['income'])
            payslip.tax = float(self.request.POST['tax'])
            payslip.net = float(self.request.POST['income']) - float(self.request.POST['tax'])
            payslip.company = self.request.POST['company']
            
            #Output the given form to the confirmation page
            self.response.out.write(payslip.ownerId + "<br/>" )
            self.response.out.write(str(payslip.upload_date) + "<br/>")
            self.response.out.write(str(payslip.beginning) + "<br/>")
            self.response.out.write(str(payslip.ending) + "<br/>")
            self.response.out.write(str(payslip.income) + "<br/>")
            self.response.out.write(str(payslip.tax) + "<br/>")
            self.response.out.write(str(payslip.net) + "<br/>")
            self.response.out.write(str(payslip.company) + "<br/>")
            
            #add the model to the data store
            payslip.put()                         
             
        else:
            self.response.out.write("other <br />")
            
            file = Models.File(parent=Models.file_key(user.user_id()))
            
            #Set the model attributes 
            file.ownerId = user.user_id()
            file.title = self.request.POST['title']
            file.description = self.request.POST['description']
            
            #Output the given form to the confirmation page
            self.response.out.write( file.ownerId + "<br/>" )
            self.response.out.write(file.title + "<br/>")
            self.response.out.write(file.description + "<br/>")
            
            #add the model to the data store
            file.put()
Example #5
0
def generate_files_html(self):
    files = db.GqlQuery("SELECT * "
                "FROM File "
                "WHERE ANCESTOR IS :1 ",
                Models.file_key(users.get_current_user().user_id()))

    
    html = ""
    for file in files:
        if file.file_key:
            html += """
                <tr>
                    <td>"""+str(file.upload_date)+"""</td>
                    <td>"""+file.title+"""</td>
                    <td>"""+file.description+"""</td>
                    <td><a href="/view_file/"""+str(file.file_key.key())+"""">View</a></td>
                </tr>
            """
        else:
            html += """
                <tr>
                    <td>"""+str(file.upload_date)+"""</td>
                    <td>"""+file.title+"""</td>
                    <td>"""+file.description+"""</td>
                    <td>View</td>
                </tr>
            """
    return html
def literature_comp(out_file):
    # compare performance vs the way it was done in literature
    # Blog 10-fold CV
    blog_results_me = Models.model_cv(Models.LogisticRegression, blog, feat4, stratified = True)
    blog_results_svm = Models.model_cv(Models.LinearSVC, blog, feat4, stratified = True)
    blog_results_svm_tfidf = Models.model_cv(Models.LinearSVC, blog, feat2, stratified = True)

    write_cv(out_file, "blog stratified ME feature 4 cv-10", blog_results_me)
    write_cv(out_file, "blog stratified SVM feature 4 cv-10", blog_results_svm)
    write_cv(out_file, "blog stratified SVM tf-idf cv-10", blog_results_svm_tfidf)

    # twitter trained on solely twitter and evaluated on positive/negative classification
    t_training_granular = to_utf8(prepareTwitterDataWithPNLabel(Globals.TWITTER_TRAIN, splitwords = False))
    t_test_granular = to_utf8(prepareTwitterDataWithPNLabel(Globals.TWITTER_TEST, splitwords = False))
    t_test_no_neutral = filter(lambda x: x[1] != '2', t_test_granular)
    # twitter granular, ignore neutral
    t_maxent = experiment_maxent(t_training_granular, t_test_no_neutral, feat4)
    # just bigram counts...cannot get the performance report in the presentation
    t_extra = experiment_maxent(t_training_granular, t_test_no_neutral, extra_features)
    write_detailed(out_file, "twitter ME feature 4 test results", t_maxent)
    write_detailed(out_file, "twitter ME extra (uni+bi+valence+punct) test results", t_extra)
Example #7
0
    def __init__(self, options):
        self.do = options['data_options']
        self.mo = options['model_options']
        self.oo = options['optimization_options']
        self.lo = options['log_options']

        data_path = self.do['data_path']
        task_num = self.do['task_number']
        lang = self.do.get('language', 'en')  # defaults to use small Eng set
        self.qa_train, self.qa_test \
            = read_dataset(data_path,
                           task_num, lang, options['data_options']['reader'],
                           {'threshold': 0,
                            'context_length': self.mo['context_length'],
                            'context_length_percentage': self.mo.get('context_length_percentage', 1),
                            'sentence_length': self.mo['sentence_length']})

        self.data_size = len(self.qa_train.stories)
        self.mo['context_length'] = self.qa_train.context_length
        #self.options['model_options']['context_length'] = self.qa_train.context_length

        tokens = self.qa_train.specialWords
        self.NULL = tokens['<NULL>']
        self.EOS = tokens['<EOS>']
        self.UNKNOWN = tokens['<UNKNOWN>']

        if self.oo['dump_params']:
            weight_dir = Path(self.oo['weight_path'])
            if not weight_dir.exists():
                weight_dir.mkdir()
        self.batch_size_train = self.oo['batch_size_train']
        self.batch_size_test = self.oo['batch_size_test']

        self.verbose = self.oo['verbose']
        self.log = self.logger_factory()
        self.lo['dump_epoch'] = self.oo['max_epoch'] \
                                if self.lo['dump_epoch'] < 0 \
                                else self.lo['dump_epoch']

        vocab_size = len(self.qa_train.index_to_word)
        options['model_options']['vocab_size'] = vocab_size
        model_name = self.mo['model_name']
        self.model = Models.model(model_name)(options)
        self.log("context length: %d" % self.mo['context_length'])
'''This is finding predicted SADs for the world bird populations.
Well it was a nice try but I ran out of memory.'''

SADModels = ['SimBrokenStick', 'SimLogNormInt', 'SimpleRandomFraction',
                            'SimParetoInt']
N = 40000
S = 100
sample_size = 100
fig = plt.figure()

for i, model in enumerate(SADModels):

        fig.add_subplot(2, 2, i+1)

        if model == 'SimBrokenStick':
                    prdSADs = Models.SimBrokenStick(N, S, sample_size)
                    HeatMap.RACHeatMap(fig, prdSADs)
                    plt.plot(np.log(AverageShape.AvgShape(prdSADs)), color = 'lime', label = 'Predicted', lw = 2)
                    print 'BS'
                    
        elif model == 'SimLogNormInt':
                    prdSADs = Models.SimLogNormInt(N, S, sample_size)
                    HeatMap.RACHeatMap(fig, prdSADs)
                    plt.plot(np.log(AverageShape.AvgShape(prdSADs)), color = 'lime', label = 'Predicted', lw = 2)
                    print 'SLN'
                    
        elif model == 'SimpleRandomFraction':
                    prdSADs = Models.SimpleRandomFraction(N, S, sample_size)
                    HeatMap.RACHeatMap(fig, prdSADs)
                    plt.plot(np.log(AverageShape.AvgShape(prdSADs)), color = 'lime', label = 'Predicted', lw = 2)
                    print 'RandFrac'
Example #9
0
def NeuralNetwork(encode_type="LabelEncode"):
    df = Base_Process(encode_type)
    # 加一个地理位置聚类
    df = pd.merge(df, _F_Clsuter_Geo(), on=pri_id, how='left')

    # 加一个用户活跃TopN省份 市 区
    temp = _F_GeoCode(n=1)
    df = pd.merge(df, temp, on=pri_id, how='left')

    # 加入distinct的统计
    temp = _F_nunique(3)
    df = pd.merge(df, temp, on=pri_id, how='left')

    # 加入ratio的统计
    temp = _F_nunique_ratio(3)
    df = pd.merge(df, temp, on=pri_id, how='left')

    _Train = pd.merge(_train, df, on=pri_id, how='left').fillna(0)
    _Test = pd.merge(_test, df, on=pri_id, how='left').fillna(0)
    features = [col for col in _Train.columns if col != pri_id and col != 'y']

    _Label = _Train['y']

    # 数据输入和结构构造
    from keras.models import Sequential
    model = Sequential()
    from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout
    from keras import backend as K
    import tensorflow as tf
    import itertools

    shape = _Train.shape
    # 卷积层
    # model.add(Conv2D(64, (3,3), activation='relu', input_shape = (shape[0],shape[1],1)))
    # # 池化层
    # model.add(MaxPooling2D(pool_size=(2,2)))
    # # 全连接层 (设置输出层的维度)
    # model.add(Dense(256, activation='relu'))
    # # dropout层
    # model.add(Dropout(0.5))
    # # 最后全连接层,输出概率
    # model.add(Dense(1, activation='sigmoid'))

    # MLP
    # print(shape)
    model.add(Dense(64, input_dim=402, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    # 编译(后面要换成自己定义的评价函数)

    # 本题评分标准
    def tpr_weight_funtion(y_true, y_pred):

        # batch_size, n_elems = y_pred.shape[0],y_pred.shape[1]
        # idxs = list(itertools.permutations(range(n_elems)))
        # permutations = tf.gather(y_pred, idxs, axis=-1)  # Shape=(batch_size, n_permutations, n_elems)

        d = pd.DataFrame()
        # sess = tf.Session()
        # sess.run(tf.global_variables_initializer())
        # d['prob'] = permutations.eval(session=sess)

        d['prob'] = list(K.eval(y_pred))
        d['y'] = list(y_true)
        d = d.sort_values(['prob'], ascending=[0])
        y = d.y
        PosAll = pd.Series(y).value_counts()[1]
        NegAll = pd.Series(y).value_counts()[0]
        pCumsum = d['y'].cumsum()
        nCumsum = np.arange(len(y)) - pCumsum + 1
        pCumsumPer = pCumsum / PosAll
        nCumsumPer = nCumsum / NegAll
        TR1 = pCumsumPer[abs(nCumsumPer - 0.001).idxmin()]
        TR2 = pCumsumPer[abs(nCumsumPer - 0.005).idxmin()]
        TR3 = pCumsumPer[abs(nCumsumPer - 0.01).idxmin()]
        return 0.4 * TR1 + 0.3 * TR2 + 0.3 * TR3

    def AUC(y_true, y_pred):
        not_y_pred = np.logical_not(y_pred)
        y_int1 = y_true * y_pred
        y_int0 = np.logical_not(y_true) * not_y_pred
        TP = np.sum(y_pred * y_int1)
        FP = np.sum(y_pred) - TP
        TN = np.sum(not_y_pred * y_int0)
        FN = np.sum(not_y_pred) - TN
        TPR = np.float(TP) / (TP + FN)
        FPR = np.float(FP) / (FP + TN)
        return ((1 + TPR - FPR) / 2)

    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    # model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=[AUC])

    # 训练 (batch_size 每次迭代选择的样本数)

    res = pd.DataFrame()
    res[pri_id] = _Test[pri_id]
    _K_Train = pd.DataFrame()
    _KTrain = pd.DataFrame()
    _KTrain[pri_id] = _Train[pri_id]
    # 需要将输入归一化
    _Train, _Test = _M.Normalize(_Train[features], _Test[features])

    from sklearn.model_selection import StratifiedKFold
    # 将_Train分成5份,5折之后求平均
    skf = StratifiedKFold(n_splits=5)
    pred = np.zeros((_Test.shape[0], 1))

    for train, test in skf.split(_Train, _Label):

        model.fit(_Train.iloc[train],
                  _Label.iloc[train],
                  epochs=50,
                  batch_size=128)
        # 连接剩下的一折和test
        temp = model.predict(_Test)
        pred += np.asarray(temp)
        _K_T = pd.DataFrame()
        _K_T[pri_id] = _KTrain.iloc[test][pri_id]
        _K_T['mlp'] = model.predict(_Train.iloc[test])
        _K_Train = pd.concat((_K_Train, _K_T))

    pred /= 5
    # 全连接的输出
    res['mlp'] = pred
    res = pd.concat((_K_Train, res))
    res.to_csv(data_path + "data/_F_mlp_features.csv", index=False)
'''
加载数据集
'''
train_dataset, test_dataset = Data_Reader.Mnist.Mnist_dataset().get_dataset()
loader_train = Data_Reader.get_dataloader(dataset=train_dataset,
                                          batch_size=param['batch_size'])
loader_test = Data_Reader.get_dataloader(dataset=test_dataset,
                                         batch_size=param['test_batch_size'])
'''
搭建模型
模型在model.py里面搭建好了,这里直接调用
'''

modelpath = './train4_AdvT_InputZero.pkl'
net = Models.Lenet5.Lenet5()  # 加载模型
net = Models.load_state_dict(net, modelpath)
base.enable_cuda(net)  # 使用cuda
num_correct, num_samples, acc = Optimizer.test(net, loader_test)  # 测试一下最初的效果
print('[Start] right predict:(%d/%d) ,pre test_acc=%.4f%%' %
      (num_correct, num_samples, acc))
'''
训练模型
'''
net.train()  # 训练模式
criterion = nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.RMSprop(
    net.parameters(),
    lr=param['learning_rate'],
    weight_decay=param['weight_decay'])  # 优化器,具体怎么优化,学习率、正则化等等

adversary = Adversary.LinfPGD.LinfPGDAttack(net, param['epsilon'])
Example #11
0
import Models
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

# step1 - Acquisire in un dataframe tutti i dati del DB
# step2 - Rimuovere i campi non utili
# step3 - Fill dei campi vuoti con il valore medio e fill delle descrizioni con il titolo
# step4 - matrice termini-doc con le long description
# step5 - unione della matrice con le altre feature
# step6 - creazione di training set e test set
# step7 - addestramento modelli predittivi

books = Models.GetAllBooks()
books_df = pd.DataFrame(books)
books_df.set_index('bid')

drop_columns = ['thumbnailUrl', 'isbn', '_id']
books_df.drop(drop_columns, inplace=True, axis=1)

books_df['longDescription'].fillna(books_df['title'], inplace=True)
books_df['pageCount'].fillna(books_df['pageCount'].median, inplace=True)
books_df['categories'].fillna('none', inplace=True)
books_df['cat'] = books_df['categories'].map(
    lambda x: x[0]
    if len(x) > 0 else 'default')  # da una lista, prendo solo il primo valore
books_df['aut'] = books_df['authors'].map(
    lambda x: x[0]
    if len(x) > 0 else 'default')  # da una lista, prendo solo il primo valore
                     (X_train.shape[0], X_train.shape[1], stock.shape[1]))

lbls_train = lbls_train.iloc[:-n_past]

# 1 hidden layer network with input: n_past x num_features, hidden 120x5, output 2x1
template = [[n_past, stock.shape[1]], [120, 5], [2, 1]]

# get Bilinear model
projection_regularizer = None
projection_constraint = keras.constraints.max_norm(3.0, axis=0)
attention_regularizer = None
attention_constraint = keras.constraints.max_norm(5.0, axis=1)
dropout = 0.1

model = Models.TABL(template, dropout, projection_regularizer,
                    projection_constraint, attention_regularizer,
                    attention_constraint)
model.summary()

# create class weight
class_weight = {0: 1e6 / 300.0, 1: 1e6 / 400.0, 2: 1e6 / 300.0}

# training          # remove .iloc[2:] for single day
model.fit(X_train,
          lbls_train.iloc[2:],
          batch_size=256,
          epochs=100,
          class_weight=class_weight)

model.save('model.h5')
Example #13
0
def searchresults():
    words = []
    info = []

    if request.method == 'POST':

        search_word = request.form['search']
        value = request.form['options']

        if 'username' in session:
            message = "you are logged in"
            currentuser = session['username']
            logState = True
            new_search = Models.Search(search_word, currentuser)
            Models.db.session.add(new_search)
            Models.db.session.commit()

            # WIKI API
            if value == 'wiki':
                # For Wikipedia
                info = []
                words = Results.getWikipediaList(search_word)
                if not words:
                    error = "There are no matches. Search again"
                    return render_template("results.html",
                                           results=info,
                                           checked=value,
                                           searched_word=search_word,
                                           logState=logState,
                                           words=error)
                else:
                    for w in words:
                        info.append(Results.getWikiInfo(w))
                    return render_template("results.html",
                                           results=info,
                                           checked=value,
                                           searched_word=search_word,
                                           logState=logState)

            # STARWARS API
            elif value == 'sw':
                try:
                    info = Results.getStarWarsList(search_word)
                    test = info[0]
                    if test == 'NA':
                        error = "There are no results for that search. Please try searching again"
                        return render_template("swerror.html",
                                               error=error,
                                               checked=value,
                                               searched_word=search_word,
                                               logState=logState)
                    else:
                        return render_template("starwars.html",
                                               person=info,
                                               error="",
                                               checked=value,
                                               searched_word=search_word,
                                               logState=logState)
                except:
                    return "too many requests using the StarWarsAPI! Try using something else."

            # IMAGE API
            elif value == 'pic':
                pictures = Results.getPicture(search_word)
                return render_template("picture.html",
                                       pictures=pictures,
                                       checked=value,
                                       searched_word=search_word,
                                       logState=logState)

            # Twitter API
            elif value == 'twit':
                tweets = TwitterAPI.TwitterAPI()
                tweet_list = tweets.getTweets(search_word)
                if not tweet_list:
                    return render_template("twitterError.html")
                else:
                    return render_template("twitter.html",
                                           checked=value,
                                           searched_word=search_word,
                                           tweetlist=tweet_list,
                                           logState=logState)
            # YouTUBE API
            elif value == 'youtube':
                videos = YoutubeAPI.youtube_search(search_word)
                if not videos:
                    uerror = "No videos found. Try searching something else"
                    return render_template("youtube.html",
                                           uerror=uerror,
                                           checked=value,
                                           searched_word=search_word,
                                           logState=logState)
                else:
                    return render_template("youtube.html",
                                           videos=videos,
                                           checked=value,
                                           searched_word=search_word,
                                           logState=logState)

            # ALL APIS
            elif value == 'all':
                list = []
                words = Results.getWikipediaList(search_word)
                for w in words:
                    list.append(Results.getWikiInfo(w))

                pictures = Results.getPicture(search_word)
                picture = pictures[0]
                # picture = ["one","two", "three"]

                info = Results.getStarWarsList(search_word)
                test = info[0]

                tweets = TwitterAPI.TwitterAPI()
                tweet_list = tweets.getTweets(search_word)

                videos = YoutubeAPI.youtube_search(search_word)

                #checks if starwars api is empty
                #if its then
                if test == 'NA':
                    error = "STARWARS API: Nothing found! Try searching again."
                    #check if wikipedia is empty
                    #if wiki is empty
                    if not words:
                        wikierror = "There are no matches. Search again"

                        #if it check if twitter is empty
                        if not tweet_list:
                            terror = "Sorry there are no tweets. Try searching again"

                            if not videos:
                                uerror = "No videos found. Try searching something else"
                                return render_template(
                                    "allresults.html",
                                    error=error,
                                    person="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    wikierror=wikierror,
                                    terror=terror,
                                    uerror=uerror)
                            else:
                                return render_template(
                                    "allresults.html",
                                    error=error,
                                    person="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    wikierror=wikierror,
                                    terror=terror,
                                    videos=videos)

                        else:
                            if not videos:
                                uerror = "No videos found. Try searching something else"
                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    tweetlist=tweet_list[:3],
                                    uerror=uerror)
                            else:
                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    tweetlist=tweet_list[:3],
                                    videos=videos)
                        #if wiki isnt empty
                    else:
                        if not tweet_list:
                            terror = "Sorry there are no tweets. Try searching again"
                            if not videos:
                                uerror = "No videos found. Try searching something else"
                                return render_template(
                                    "allresults.html",
                                    error=error,
                                    person="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    terror=terror,
                                    uerror=uerror)
                            else:
                                return render_template(
                                    "allresults.html",
                                    error=error,
                                    person="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    terror=terror,
                                    videos=videos)
                        else:
                            if not videos:
                                uerror = "No videos found. Try searching something else"

                                return render_template(
                                    "allresults.html",
                                    error=error,
                                    person="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    tweetlist=tweet_list[:3],
                                    uerror=uerror)
                            else:
                                return render_template(
                                    "allresults.html",
                                    error=error,
                                    person="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    tweetlist=tweet_list[:3],
                                    videos=videos)

                #if starwars is not empty
                else:
                    #check wikipedia
                    if not words:
                        wikierror = "There are no matches. Search again"

                        if not tweet_list:
                            terror = "Sorry there are no tweets. Try searching again"
                            if not videos:
                                uerror = "No videos found. Try searching something else"

                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    wikierror=wikierror,
                                    terror=terror,
                                    uerror=uerror)
                            else:
                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    wikierror=wikierror,
                                    terror=terror,
                                    videos=videos)
                        else:
                            if not videos:
                                uerror = "No videos found. Try searching something else"
                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    tweetlist=tweet_list[:3],
                                    uerror=uerror)
                            else:
                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    tweetlist=tweet_list[:3],
                                    videos=videos)

                    else:
                        if not tweet_list:
                            terror = "Sorry there are no tweets. Try searching again"
                            if not videos:
                                uerror = "No videos found. Try searching something else"
                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    terror=terror,
                                    uerorr=uerror)
                            else:

                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    terror=terror,
                                    videos=videos)
                        else:
                            if not videos:
                                uerror = "No videos found. Try searching something else"

                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    tweetlist=tweet_list[:3],
                                    uerror=uerror)
                            else:
                                return render_template(
                                    "allresults.html",
                                    person=info,
                                    error="",
                                    results=list,
                                    picture=picture,
                                    checked=value,
                                    searched_word=search_word,
                                    logState=logState,
                                    tweetlist=tweet_list[:3],
                                    videos=videos)

        #REGULAR USER
        else:
            logState = False
            message = "you are not logged in"

            # WIKI API
            if value == 'wiki':
                # For Wikipedia
                info = []
                words = Results.getWikipediaList(search_word)
                if not words:
                    error = "There are no matches. Search again"
                    return render_template("results.html",
                                           results=info,
                                           checked=value,
                                           searched_word=search_word,
                                           logState=logState,
                                           words=error)
                else:
                    for w in words:
                        info.append(Results.getWikiInfo(w))
                    return render_template("results.html",
                                           results=info,
                                           checked=value,
                                           searched_word=search_word,
                                           logState=logState)

            #STARWARS API
            elif value == 'sw':
                try:
                    info = Results.getStarWarsList(search_word)
                    test = info[0]
                    if test == 'NA':
                        error = "There are no results for that search. Please try searching again"
                        return render_template("swerror.html",
                                               error=error,
                                               checked=value,
                                               searched_word=search_word)
                    else:
                        return render_template("starwars.html",
                                               person=info,
                                               error="",
                                               checked=value,
                                               searched_word=search_word,
                                               logState=logState)
                except:
                    return "too many requests using the StarWarsAPI! Try using something else."

            # IMAGE API
            elif value == 'pic':
                pictures = Results.getPicture(search_word)
                return render_template("picture.html",
                                       pictures=pictures,
                                       checked=value,
                                       searched_word=search_word,
                                       logState=logState)

            #ALL APIS
            elif value == 'all':
                list = []
                words = Results.getWikipediaList(search_word)
                for w in words:
                    list.append(Results.getWikiInfo(w))

                pictures = Results.getPicture(search_word)
                picture = pictures[0]
                # picture = ["one","two", "three"]

                info = Results.getStarWarsList(search_word)
                test = info[0]
                if test == 'NA':
                    error = "STARWARS API: Nothing found! Try searching again."
                    if not words:
                        wikierror = "There are no matches. Search again"
                        return render_template("allresults.html",
                                               error=error,
                                               person="",
                                               results=list,
                                               picture=picture,
                                               checked=value,
                                               searched_word=search_word,
                                               logState=logState,
                                               wikierror=wikierror)
                    else:
                        return render_template("allresults.html",
                                               error=error,
                                               person="",
                                               results=list,
                                               picture=picture,
                                               checked=value,
                                               searched_word=search_word,
                                               logState=logState)
                else:
                    if not words:
                        wikierror = "There are no matches. Search again"
                        return render_template("allresults.html",
                                               person=info,
                                               error="",
                                               results=list,
                                               picture=picture,
                                               checked=value,
                                               searched_word=search_word,
                                               logState=logState,
                                               wikierror=wikierror)
                    else:
                        return render_template("allresults.html",
                                               person=info,
                                               error="",
                                               results=list,
                                               picture=picture,
                                               checked=value,
                                               searched_word=search_word,
                                               logState=logState)
Example #14
0
#!/usr/bin/env python

from settings         import *
from algo_settings    import *
from CustomModels     import *
from Models           import *

if __name__ == '__main__':
  if validationBool :
    models = Models(model_list)
    models.train()
    models.validation()
    models.dump()
    if multipleSubmissions :
      models.writeSubmissions()

  if submitBool :
    model.train()
    model.validate()
    print(model.trainPath)
    print(model.submissionPath)
    model.writeSubmission()


  print('Hello World Juju and Ulysse')
Example #15
0
from PIL import ImageOps
import matplotlib.pyplot as plt
import time

#Training setttings
parser = argparse.ArgumentParser(
    description='KITTI Depth Completion Task TEST')
parser.add_argument('--dataset',
                    type=str,
                    default='kitti',
                    choices=Datasets.allowed_datasets(),
                    help='dataset to work with')
parser.add_argument('--mod',
                    type=str,
                    default='mod',
                    choices=Models.allowed_models(),
                    help='Model for use')
parser.add_argument('--no_cuda', action='store_true', help='no gpu usage')
parser.add_argument('--input_type',
                    type=str,
                    default='rgb',
                    help='use rgb for rgbdepth')
# Data augmentation settings
parser.add_argument('--crop_w',
                    type=int,
                    default=1216,
                    help='width of image after cropping')
parser.add_argument('--crop_h',
                    type=int,
                    default=256,
                    help='height of image after cropping')
Example #16
0
def main():
    global args
    global dataset
    args = parser.parse_args()

    torch.backends.cudnn.benchmark = args.cudnn

    best_file_name = glob.glob(os.path.join(args.save_path, 'model_best*'))[0]

    save_root = os.path.join(os.path.dirname(best_file_name), 'results')
    if not os.path.isdir(save_root):
        os.makedirs(save_root)

    print("==========\nArgs:{}\n==========".format(args))
    # INIT
    print("Init model: '{}'".format(args.mod))
    args.channels_in = 1 if args.input_type == 'depth' else 4
    model = Models.define_model(args.mod, args)
    print("Number of parameters in model {} is {:.3f}M".format(
        args.mod.upper(),
        sum(tensor.numel() for tensor in model.parameters()) / 1e6))
    if not args.no_cuda:
        # Load on gpu before passing params to optimizer
        if not args.multi:
            model = model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
    if os.path.isfile(best_file_name):
        print("=> loading checkpoint '{}'".format(best_file_name))
        checkpoint = torch.load(best_file_name)
        model.load_state_dict(checkpoint['state_dict'])
        lowest_loss = checkpoint['loss']
        best_epoch = checkpoint['best epoch']
        print(
            'Lowest RMSE for selection validation set was {:.4f} in epoch {}'.
            format(lowest_loss, best_epoch))
    else:
        print("=> no checkpoint found at '{}'".format(best_file_name))
        return

    if not args.no_cuda:
        model = model.cuda()
    print("Initializing dataset {}".format(args.dataset))
    dataset = Datasets.define_dataset(args.dataset, args.data_path,
                                      args.input_type)
    dataset.prepare_dataset()
    to_pil = transforms.ToPILImage()
    to_tensor = transforms.ToTensor()
    norm = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
    depth_norm = transforms.Normalize(mean=[14.97 / args.max_depth],
                                      std=[11.15 / args.max_depth])
    model.eval()
    print("===> Start testing")
    total_time = []
    if args.num_samples != 0:
        random_sampler = Random_Sampler(args.num_samples)
    with torch.no_grad():
        for i, (img, rgb, gt) in tqdm.tqdm(
                enumerate(
                    zip(dataset.selected_paths['lidar_in'],
                        dataset.selected_paths['img'],
                        dataset.selected_paths['gt']))):

            raw_path = os.path.join(img)
            raw_pil = Image.open(raw_path)
            gt_path = os.path.join(gt)
            gt_pil = Image.open(gt)
            assert raw_pil.size == (1216, 352)

            crop = 352 - args.crop_h
            raw_pil_crop = raw_pil.crop((0, crop, 1216, 352))
            gt_pil_crop = gt_pil.crop((0, crop, 1216, 352))

            raw = depth_read(raw_pil_crop, args.sparse_val)
            if args.num_samples != 0:
                raw = random_sampler.sample(raw)
            raw = to_tensor(raw).float()
            gt = depth_read(gt_pil_crop, args.sparse_val)
            gt = to_tensor(gt).float()
            valid_mask = (raw > 0).detach().float()

            input = torch.unsqueeze(raw, 0).cuda()
            gt = torch.unsqueeze(gt, 0).cuda()

            # if args.normal:
            # input = input/args.max_depth
            # input = depth_norm(input)

            if args.input_type == 'rgb':
                rgb_path = os.path.join(rgb)
                rgb_pil = Image.open(rgb_path)
                assert rgb_pil.size == (1216, 352)
                rgb_pil_crop = rgb_pil.crop((0, crop, 1216, 352))
                rgb = to_tensor(rgb_pil_crop).float()
                if not args.normal:
                    rgb = rgb * 255.0
                else:
                    rgb = norm(rgb)
                rgb = torch.unsqueeze(rgb, 0).cuda()

                input = torch.cat((input, rgb), 1)

            torch.cuda.synchronize()
            a = time.perf_counter()
            output, hidden = model(input, hidden=(None, None))
            if 'mod' in args.mod or 'stacked' in args.mod:
                output = output[0]

            torch.cuda.synchronize()
            b = time.perf_counter()
            total_time.append(b - a)
            # if args.normal:
            # output = output*args.max_depth
            output = torch.clamp(output, min=0, max=85)

            output = output * 256.
            raw = raw * 256.
            output = output[0][0:1].cpu()
            data = output[0].numpy()

            if crop != 0:
                padding = (0, 0, crop, 0)
                output = torch.nn.functional.pad(output, padding, "constant",
                                                 0)
                output[:, 0:crop] = output[:, crop].repeat(crop, 1)

            pil_img = to_pil(output.int())
            assert pil_img.size == (1216, 352)
            pil_img.save(os.path.join(save_root, os.path.basename(img)))
    print('average_time: ', sum(total_time[100:]) / (len(total_time[100:])))
    print('num imgs: ', i + 1)
Example #17
0
    test_data = util.SICKData('test')
    #test_loader = DataLoader(train_data, batch_size = args.batch_size, shuffle=True, num_workers=1)
    test_loader = DataLoader(test_data,
                             batch_size=args.batch_size,
                             shuffle=True,
                             drop_last=True)
    embedding_model = util.readEmbedding()  ##

    # Model
    print('==> Building model..')

    if args.model == 'biRNN':
        net = Models.biRNN(embedding_model,
                           batch_size=args.batch_size,
                           hidden_size=args.hidden_size,
                           embedding_dim=300,
                           dropout=args.dropout_lstm)
    #TODO: add more models
    net.to(device)

    optimizer = optim.Adam(net.parameters(), lr=args.lr)
    CEloss = nn.BCELoss()

    start_epoch = 0  # start from epoch 0 or last checkpoint epoch
    # Load checkpoint.
    if args.checkpoint:
        print('==> Resuming from checkpoint..')
        print(args.checkpoint)
        checkpoint = torch.load(args.checkpoint)
        net.load_state_dict(checkpoint['net_state_dicts'])
Example #18
0
def train(lr=0.01,
          gpu=0,
          epochs=500,
          file_name='DefaultFileName',
          charge=None,
          save=True,
          batch_size=64,
          epochs_for_saving=3):

    #Stablishing the device
    device = 'cuda:' + str(gpu) if torch.cuda.is_available() else 'cpu'
    if device == 'cpu':
        warnings.warn(message="Executing on CPU!", category=ResourceWarning)

    #Generating the model
    #Change this line for changing the model to create
    model = models.ThreeLayerSigmoidRegressor()
    if charge is not None:
        model = load_model(model=model, file_name=charge)
    model = model.to(device)

    #Training Parameters
    criterion = RMSLELoss()  #nn.MSELoss()#nn.L1Loss()
    optimizer = optim.Adam(
        model.parameters(),
        lr=lr)  # optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    #Generating the Dataset
    dataset = ASHRAEDataset(erase_nans=False)
    train_len, validation_len = int(math.ceil(0.95 * len(dataset))), int(
        math.ceil(0.04 * len(dataset)))
    train, validation, test = data.random_split(
        dataset,
        (train_len, validation_len, len(dataset) - train_len - validation_len))
    #Pass to Dataloader for reading batches
    train = DataLoader(train,
                       batch_size=batch_size,
                       shuffle=True,
                       num_workers=1,
                       pin_memory=True)
    validation = DataLoader(validation,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=1,
                            pin_memory=True)

    #Uncomment for seeing pearson correlations
    #pearsons_of_each_variable(data=train)

    #Writer for plotting graphic in tensorboard
    writer = SummaryWriter(comment=file_name)
    print('Starting the training...')
    print("Batch Size: " + str(batch_size))
    print("Running in: " + device)

    for i in range(epochs):
        train_step(model=model,
                   data=train,
                   criterion=criterion,
                   optimizer=optimizer,
                   epoch=i,
                   device=device,
                   writer=writer,
                   verbose=True)
        validation_step(model=model,
                        data=validation,
                        criterion=criterion,
                        epoch=i,
                        device=device,
                        writer=writer)
        if save and i % epochs_for_saving == 0:
            writer.flush()
            model = model.cpu()
            save_model(model=model, file_name=file_name)
            model = model.to(device)
    writer.close()
Example #19
0
    np.random.seed(117)      # set numpy seed to get consistent data
    dataGen = GenerateARXData(noise_form=noise_form)
    X, Y, E = dataGen(N, 1)

    # Scale the data
    scale = Y.max()
    X = X / scale
    Y = Y / scale
    E = E/scale

    # simulate test data set
    X_test, Y_test, _ = dataGen(N_test, 1)
    X_test = X_test/scale
    Y_test = Y_test/scale

    net = Models.EBM_ARX_net(feature_net_dim=hidden_dim,predictor_net_dim=hidden_dim, decay_rate=0.99, num_epochs=150, use_double=False)
    net.fit(X, Y)
    training_losses = net.training_losses

    plt.plot(training_losses)
    plt.title('Training loss')
    plt.xlabel('epoch')
    plt.show()

    # make baseline predictions of test data set using least squares
    estim_param, _resid, _rank, _s = linalg.lstsq(X, Y)
    mse_baseline = np.mean((X_test @ estim_param - Y_test) ** 2)


    # # make predictions of test data set using trained EBM NN
    yhat, prediction_scores = net.predict(X_test)
Example #20
0
### parse the full xml files and extract the relevant data

print('Parsing courses...')
data = ET.parse('data/courses_full.xml').getroot()

print(str(len(data)) + ' courses found')
courses = []
    
for course in data.findall('Courses/FullXML/Course'):
    title = course.find('Title[@Lang="en-GB"]').attrib.get('Title')
    ects = course.find('Point').text
    lang = course.find('Teaching_Language').attrib.get('LangCode')
    prereqs = []
    prereq_node = course.find('Qualified_Prerequisites/Qualified_Prerequisites_Txt[@Lang="en-GB"]')
    if prereq_node != None:
        prereqs = Models.parse_prereqs(prereq_node.attrib.get('Txt'))
            
    course = Models.Course(title,
                           course.attrib.get('CourseID'),
                           course.attrib.get('CourseCode'),
                           prereqs,
                           ects,
                           lang)

    courses.append(course)

dicts = [c.__dict__ for c in courses]
f = open('data/courses_full.json', 'w')
f.write(json.dumps(dicts) + '\n')
f.close()
Example #21
0
test_random_indices_poly = random_indices_poly[train_split_poly:]

# Class for loading Polygons sequence from a sequence folder
denoise_generator_poly = GP.DenoiseHPatchesPoly_Exp6(
    random_indices_poly=train_random_indices_poly,
    inputs=Inputs,
    labels=Labels,
    batch_size=50)
denoise_generator_val_poly = GP.DenoiseHPatchesPoly_Exp6(
    random_indices_poly=test_random_indices_poly,
    inputs=Inputs,
    labels=Labels,
    batch_size=50)

shape = (64, 64, 4)
denoise_model = Models.get_baseline_model(shape)

#   ===================================== Train =====================================

epochs = 1

Train_Denoiser.train_denoiser(denoise_generator_poly,
                              denoise_generator_val_poly, denoise_model,
                              epochs)

#   ===================================== Output results =====================================

imgs, imgs_clean = next(iter(denoise_generator_val_poly))
index = np.random.randint(0, imgs.shape[0])
imgs_den = denoise_model.predict(imgs)
Example #22
0
    def post(self):
        self.response.out.write("added <br />")
        user = users.get_current_user()
        
        #See if the user choose to upload a payslip or other
        if self.request.POST['type'] == "payslip":

            self.response.out.write("payslip <br />")
            
            payslip = Models.Payslip(parent=Models.payslip_key(user.user_id()))
             
            #Set the model attributes 
            payslip.ownerId = user.user_id()
            payslip.beginning = datetime.datetime.strptime(self.request.POST['beginning'],'%Y-%m-%d').date()
            payslip.ending = datetime.datetime.strptime(self.request.POST['ending'],'%Y-%m-%d').date()
            payslip.income = float(self.request.POST['income'])
            payslip.tax = float(self.request.POST['tax'])
            payslip.net = float(self.request.POST['income']) - float(self.request.POST['tax'])
            payslip.company = self.request.POST['company']
            
            # Create the file
            file_name = files.blobstore.create(mime_type='application/octet-stream')
           
            
            # Open the file and write to it
            with files.open(file_name, 'a') as f:
                f.write("data")
            
            # Finalize the file. Do this before attempting to read it.
            files.finalize(file_name)
            
            # Get the file's blob key
            payslip.file_key = files.blobstore.get_blob_key(file_name)
            
            #Output the given form to the confirmation page
            self.response.out.write(payslip.ownerId + "<br/>" )
            self.response.out.write(str(payslip.upload_date) + "<br/>")
            self.response.out.write(str(payslip.beginning) + "<br/>")
            self.response.out.write(str(payslip.ending) + "<br/>")
            self.response.out.write(str(payslip.income) + "<br/>")
            self.response.out.write(str(payslip.tax) + "<br/>")
            self.response.out.write(str(payslip.net) + "<br/>")
            self.response.out.write(str(payslip.company) + "<br/>")
            
            #add the model to the data store
            payslip.put()             
            self.redirect('/payslips')
           
             
        else:
            self.response.out.write("other <br />")
            
            file = Models.File(parent=Models.file_key(user.user_id()))
            
            #Set the model attributes 
            file.ownerId = user.user_id()
            file.title = self.request.POST['title']
            file.description = self.request.POST['description']


            form_data = cgi.FieldStorage()
            
            if form_data.getvalue('file'):
                subject = form_data.getvalue('file')
                # Create the file
                file_name = files.blobstore.create(mime_type='application/octet-stream')
                
                # Open the file and write to it
                with files.open(file_name, 'a') as f:
                    f.write(subject)
                
                # Finalize the file. Do this before attempting to read it.
                files.finalize(file_name)
                
                # Get the file's blob key
                file.file_key = files.blobstore.get_blob_key(file_name)
               
            else:
               subject = "Not set"
               
            
            #Output the given form to the confirmation page
            self.response.out.write( file.ownerId + "<br/>" )
            self.response.out.write(file.title + "<br/>")
            self.response.out.write(file.description + "<br/>")
            
            #add the model to the data store
            file.put()
            self.redirect('/files')
Example #23
0
def validate_unet(train_dataset,
                  val_dataset=None,
                  net_depth=4,
                  val_steps=100,
                  loss=None,
                  tag=''):
    """Run full volume CPU validation on both training and validation."""
    if val_dataset is None:
        val_dataset = train_dataset
    Logger.info("Validate unet predictions for training dataset %s on %s" %
                (train_dataset.name, val_dataset.name))

    with tf.device('/cpu:0'):
        model = Models.UNet(train_dataset.n_classes,
                            depth=net_depth,
                            n_channels=train_dataset.n_modalities)

    loaddir = Tools.get_dataset_savedir(train_dataset, loss)
    weights_file = '%s/best_weights.h5' % loaddir
    secondary_weights_file = '%s/weights.h5' % loaddir

    if loss is None:
        loss = 'sparse_categorical_crossentropy'

    model.compile(loss=loss, optimizer='sgd')

    # tr_gen, val_gen = dataset.get_full_volume_generators(patch_multiplicity=model.patch_multiplicity,
    #                                                      infinite=False)
    val_gen = val_dataset.get_val_generator(
        patch_multiplicity=model.patch_multiplicity, infinite=False)

    if val_dataset is not train_dataset:
        val_gen = BatchGenerator.ModalityFilter(val_gen,
                                                val_dataset.modalities,
                                                train_dataset.modalities)

    load_weights(model, weights_file, secondary_weights_file)

    # for generator in [val_gen]:
    generator = val_gen

    Logger.info('Running validation on %s, trained with %s' %
                (val_dataset.name, train_dataset.name))
    metrics = []
    for _, y_true, y_pred in model.predict_generator(generator,
                                                     steps=val_steps):
        ignore_mask = y_true == -1
        Logger.debug('y_pred labels:', set(y_pred.flat), '- y_true labels:',
                     set(y_true.flat))
        y_true[ignore_mask] = 0
        y_pred[ignore_mask] = 0

        new_metrics = MetricsMonitor.MetricsMonitor.getMetricsForWholeSegmentation(
            y_pred, y_true, labels=range(1, model.n_classes))
        new_metrics = np.squeeze(new_metrics, axis=0)
        # new_metrics = np.nan_to_num(np.squeeze(new_metrics, axis=0))
        metrics.append(new_metrics)

    # Note: this assumes FetcherThread generates images in the order given by paths.
    paths = generator.paths
    metrics = np.array(metrics)
    metric_labels = [
        'Accuracy', 'Sensitivity', 'Specificity', 'Dice', 'Jaccard'
    ]
    df = pd.DataFrame()
    for i, clss in enumerate(val_dataset.classes[1:]):
        tmp_df = pd.DataFrame(metrics[:, i, :], columns=metric_labels)
        tmp_df['Class'] = clss
        tmp_df['Path'] = paths
        df = df.append(tmp_df)
    if tag:
        df['Loss function'] = tag
    Logger.debug('Validation metrics:\n',
                 df.groupby(['Loss function', 'Class']).mean())

    metrics_file = '%s/validation_metrics' % loaddir
    if val_dataset is not train_dataset:
        metrics_file += '_' + val_dataset.name
    metrics_file += '.csv'
    Logger.info('Saving validation metrics to', metrics_file)
    # np.save(metrics_file, np.array(gen_metrics))
    df.to_csv(metrics_file)
n_hidden = 100                             # Number of hidden units
n_layers = 2                               # Number of LSTM layers
p_dropout = 0.2                            # Probability of dropout
bidir = False                              # Sets if the RNN layer is bidirectional or not

if use_delta_ts == 'normalized':
    # Count the delta_ts column as another feature, only ignore ID, timestamp and label columns
    n_inputs = dataset.n_inputs + 1
elif use_delta_ts == 'raw':
    raise Exception('ERROR: When using a model of type Vanilla RNN, we can\'t use raw delta_ts. Please either normalize it (use_delta_ts = "normalized") or discard it (use_delta_ts = False).')

# Instantiating the model:

model = Models.VanillaRNN(n_inputs, n_hidden, n_outputs, n_layers, p_dropout,
                          embed_features=embed_features, n_embeddings=n_embeddings,
                          embedding_dim=embedding_dim, bidir=bidir)
model

# Define the name that will be given to the models that will be saved:

model_name = 'rnn'
if dataset_mode == 'pre-embedded':
    model_name = model_name + '_pre_embedded'
elif dataset_mode == 'learn embedding':
    model_name = model_name + '_with_embedding'
elif dataset_mode == 'one hot encoded':
    model_name = model_name + '_one_hot_encoded'
if use_delta_ts is not False:
    model_name = model_name + '_delta_ts'
model_name
Example #25
0
def visualize_unet(train_dataset,
                   val_dataset=None,
                   net_depth=4,
                   loss=None,
                   savefile='',
                   plot=False,
                   full_volume=True,
                   skip=0):
    """Compute one MultiUNet prediction and visualize against ground truth."""
    if val_dataset is None:
        val_dataset = train_dataset
    Logger.info(
        "Visualizing unet predictions for training dataset %s on an image from %s"
        % (train_dataset.name, val_dataset.name))

    device = '/cpu:0' if full_volume else '/gpu:0'
    with tf.device(device):
        model = Models.UNet(train_dataset.n_classes,
                            depth=net_depth,
                            n_channels=train_dataset.n_modalities)

    if full_volume:
        generator = val_dataset.get_val_generator(
            batch_size=1,
            patch_multiplicity=model.patch_multiplicity,
            infinite=False)
    else:
        generator = val_dataset.get_val_generator(
            (128, 128, 128),
            transformations=BatchGenerator.Transformations.CROP,
            patch_multiplicity=model.patch_multiplicity,
            batch_size=1,
            infinite=False)
    if val_dataset is not train_dataset:
        generator = BatchGenerator.ModalityFilter(generator,
                                                  val_dataset.modalities,
                                                  train_dataset.modalities)

    savedir = Tools.get_dataset_savedir(train_dataset, loss)
    weights_file = '%s/best_weights.h5' % savedir
    secondary_weights_file = '%s/weights.h5' % savedir

    if loss is None:
        loss = 'sparse_categorical_crossentropy'

    model.compile(loss=loss, optimizer='adam')

    # print(model.summary(line_length=150, positions=[.25, .55, .67, 1.]))

    load_weights(model, weights_file, secondary_weights_file)

    Logger.info('Predicting image', generator.paths[skip])
    for i in range(skip):
        next(generator)
    if full_volume:
        x, y, y_pred = next(model.predict_generator(generator, steps=1))
    else:
        x, y = next(generator)
        y_pred = model.predict(x, modalities=val_dataset.modalities)
    x = x[0, ..., 0]
    y = y[0, ...]
    y_pred = y_pred[0]

    if savefile:
        Helpers.save_predictions(x, y, y_pred, savefile=savefile)
    if plot:
        Helpers.visualize_predictions(x, y, y_pred)
Example #26
0
        train_option['data_name'] = data_name.replace('/', '_')

        if gray_scale:
            train_option['lr'] = [1e-2, 2e-4]
            train_option['l2_reg'] = [1e-4, 5e-4]

        # training and validation data (I: cam-captured envelope; J: hidden content GT)
        train_data = {'I': cam_train[:num_train, :, :, :], 'J': gt_train[:num_train, :, :, :]}
        valid_data = {'I': cam_valid, 'J': gt_valid}

        for degradation in degradation_list:
            # for repeatability
            resetRNGseed(0)

            # WarpingNet for geometric correction
            warping_net = Models.WarpingNet(chan_in=train_data['I'].shape[1], out_size=gt_train.shape[2:4])
            if torch.cuda.device_count() >= 1: warping_net = nn.DataParallel(warping_net, device_ids=device_ids).to(device)

            # Dehazing and RefineNet
            dehazing_refine_net = Models.DehazingRefineNet(chan_in=train_data['I'].shape[1], chan_out=train_data['J'].shape[1], degradation=degradation)
            if torch.cuda.device_count() >= 1: dehazing_refine_net = nn.DataParallel(dehazing_refine_net, device_ids=device_ids).to(device)

            # Neural-STE
            model = Models.NeuralSTE(warping_net, dehazing_refine_net, degradation=degradation)  # with GAN
            if torch.cuda.device_count() >= 1: model = nn.DataParallel(model, device_ids=device_ids).to(device)

            # train
            print('-------------------------------------- Training Options -----------------------------------')
            print("\n".join("{}: {}".format(k, v) for k, v in train_option.items()))
            print('-------------------------------------- Start training {:s} ---------------------------'.format(model.module.name))
Example #27
0
def train_unet(dataset,
               epochs=1,
               steps_per_epoch=200,
               batch_size=7,
               patch_shape=(32, 32, 32),
               net_depth=4,
               loss=None,
               sample_bg=False):
    """Build UNet, load the weights (if any), train, save weights."""
    Logger.info("Training unet on %s" % (dataset.name))
    savedir = Tools.get_dataset_savedir(dataset, loss)
    weights_file = '%s/weights.h5' % savedir
    best_weights_file = '%s/best_weights.h5' % savedir
    epoch_file = '%s/last_epoch.txt' % savedir
    metrics_file = '%s/metrics.csv' % savedir
    full_volume_metrics_file = '%s/full_volume_metrics' % savedir
    tensorboard_dir = '%s/tensorboard' % savedir

    if os.path.isfile(epoch_file):
        initial_epoch = int(open(epoch_file, 'r').readline())
    else:
        initial_epoch = 0

    epochs += initial_epoch

    n_classes = dataset.n_classes
    n_channels = dataset.n_modalities

    if loss is None:
        loss = 'sparse_categorical_crossentropy'

    model = Models.UNet(n_classes, depth=net_depth, n_channels=n_channels)

    # print('patch_multiplicity', model.patch_multiplicity)
    # patch_tr_gen = dataset.get_train_generator(patch_shape, batch_size=batch_size)
    # patch_val_gen = val_dataset.get_val_generator(patch_shape=(128, 128, 128))

    patch_tr_gen, patch_val_gen = dataset.get_patch_generators(
        patch_shape, batch_size=batch_size, sample_train_bg=sample_bg)
    # full_tr_gen, full_val_gen = dataset.get_full_volume_generators(model.patch_multiplicity)

    model.compile(
        loss=loss,
        # optimizer='adam',
        optimizer=keras.optimizers.Adam(lr=0.0002),
        metrics=[sparse_categorical_accuracy, Metrics.discrete_mean_dice_coef])

    print(model.summary(line_length=150, positions=[.25, .55, .67, 1.]))

    load_weights(model, weights_file)

    Tools.ensure_dir(savedir)
    model_checkpoint = ModelCheckpoint(weights_file,
                                       monitor='val_loss',
                                       save_best_only=False)
    best_model_checkpoint = ModelCheckpoint(best_weights_file,
                                            monitor='val_loss',
                                            save_best_only=True)

    for file in glob.glob('tensorboard/*'):
        os.remove(file)
    tensorboard = Metrics.TrainValTensorBoard(log_dir=tensorboard_dir,
                                              histogram_freq=0,
                                              write_graph=True,
                                              write_images=True)

    # def sched(epoch, lr):
    #   return lr * .99
    # lr_sched = LearningRateScheduler(sched, verbose=1)

    # full_volume_validation = Metrics.FullVolumeValidationCallback(model,
    #     full_val_gen, metrics_savefile=full_volume_metrics_file, validate_every_n_epochs=10)

    h = model.fit_generator(
        patch_tr_gen,
        steps_per_epoch=steps_per_epoch,
        initial_epoch=initial_epoch,
        epochs=epochs,
        validation_data=patch_val_gen,
        validation_steps=10,
        callbacks=[
            model_checkpoint,
            best_model_checkpoint,
            tensorboard,
            # lr_sched,
            #  full_volume_validation
        ])

    # Write metrics to a csv.
    keys = sorted(h.history.keys())
    if not os.path.exists(metrics_file):
        metrics_f = open(metrics_file, 'w')
        metrics_writer = csv.writer(metrics_f)
        metrics_writer.writerow(keys)
    else:
        metrics_f = open(metrics_file, 'a')
        metrics_writer = csv.writer(metrics_f)
    metrics_writer.writerows(zip(*[h.history[key] for key in keys]))

    open(epoch_file, 'w').write(str(epochs))
    Logger.info("Done")
Example #28
0
    y_cv_cat = np_utils.to_categorical(y_cv.flatten(), modelParams.nb_classes)
    y_cv_cat = y_cv_cat.reshape(
        (y_cv.shape[0], y_cv.shape[1] * y_cv.shape[2], modelParams.nb_classes))

    loss = model.evaluate(x_cv, y_cv_cat, batch_size=modelParams.batchSize)

    logging.info("Loss: {0}".format(str(loss)))

    return model


modelsPath = join(DataTools.inDir, "models")
if not exists(modelsPath):
    makedirs(modelsPath)

model = Models.getUNet(mp.input_shape, mp.nb_classes)
#model = load_model(join(modelsPath, "gnet_gray_test_5.hdf5"))

allTrainIds = DataTools.trainImageIds
trainImages = [
    '6110_3_1', '6100_2_3', '6040_1_3', '6010_4_4', '6140_3_1', '6110_1_2',
    '6060_2_3'
]  # np.random.permutation(allTrainIds)[:7]

# '6040_1_3' - do not use
# '6010_4_4' - do not use

checkpointer = ModelCheckpoint(filepath="unet_weights.{epoch:02d}.hdf5",
                               verbose=1,
                               save_best_only=True)
csv_logger = CSVLogger('training.log')
Example #29
0
 def add_song(self, user):
     user = user + "_playlist"
     playlist_name = "upload"
     Models.add_song_to_playlist(self, playlist_name, user)
Example #30
0
def train(model_name="CNN",
          batch_size=32,
          nb_epoch=2000,
          dataset="mnist",
          optimizer="CDGD",
          nb_agents=5,
          step_eval=20,
          **kwargs):

    paramExpla = [
        "model_name", "optimizer", "dataset", "nb_epoch", "batch_size",
        "nb_agents"
    ] + list(kwargs.keys())
    parameters = [
        model_name, optimizer, dataset, nb_epoch, batch_size, nb_agents
    ] + list(kwargs.values())

    print('\nStarting Process:')
    print(list(zip(paramExpla, parameters)))

    if dataset == "cifar10":
        (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
    if dataset == "cifar100":
        (X_train, Y_train), (X_test, Y_test) = cifar100.load_data()
    if dataset == "mnist":
        (X_train, Y_train), (X_test, Y_test) = mnist.load_data()
        X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
        X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255.
    X_test /= 255.

    img_dim = X_train.shape[-3:]
    nb_classes = len(np.unique(Y_train))

    Y_train = np_utils.to_categorical(Y_train, nb_classes)
    Y_test = np_utils.to_categorical(Y_test, nb_classes)

    ins = [X_train, Y_train]
    num_train_samples = ins[0].shape[0]
    agent_data_size = (num_train_samples // nb_agents)

    x_data = {}
    y_data = {}
    x_vali = {}
    y_vali = {}

    for i in range(nb_agents):
        x_data['input_' + str(i + 1)] = X_train[i * agent_data_size:(i + 1) *
                                                agent_data_size]
        y_data['d' + str(i + 1)] = Y_train[i * agent_data_size:(i + 1) *
                                           agent_data_size]
        x_vali['input_' + str(i + 1)] = X_test
        y_vali['d' + str(i + 1)] = Y_test

    test_batch_size = 1024
    if 'test_batch_size' in kwargs:
        test_batch_size = kwargs['test_batch_size']

    trainData = tf.data.Dataset.from_tensor_slices(
        (x_data, y_data)).shuffle(1024).repeat().batch(batch_size).prefetch(1)
    testData = tf.data.Dataset.from_tensor_slices(
        (x_vali, y_vali)).batch(test_batch_size)

    #print(trainData)
    #print(testData)

    lr = 1e-3

    if 'lr' in kwargs:
        lr = kwargs['lr']

        if lr == 'PolynomialDecay':

            if not 'starter_learning_rate' in kwargs:
                raise ValueError(
                    'For TernGrad, must specify the "starter_learning_rate" parameter'
                )

            if not 'decay_steps' in kwargs:
                raise ValueError(
                    'For TernGrad, must specify the "decay_steps" parameter')

            if not 'end_learning_rate' in kwargs:
                raise ValueError(
                    'For TernGrad, must specify the "end_learning_rate" parameter'
                )

            if not 'power' in kwargs:
                raise ValueError(
                    'For TernGrad, must specify the "power" parameter')

            lr = tf.keras.optimizers.schedules.PolynomialDecay(
                kwargs['starter_learning_rate'],
                kwargs['decay_steps'],
                kwargs['end_learning_rate'],
                power=kwargs['power'])

    if nb_agents != 1:

        topology = 'full'
        if 'topology' in kwargs:
            topology = kwargs['topology']

        always_update = False
        if 'always_update' in kwargs:
            always_update = kwargs['always_update']

        big_k = 1
        maxLam = 0.01

        params = Params(nb_agents,
                        big_k,
                        always_update=always_update,
                        topology=topology)

    if optimizer == "CDGD":

        if not 'c1' in kwargs:
            raise ValueError('For CDGD, must specify the "c1" parameter')

        if not 'delta' in kwargs:
            raise ValueError('For CDGD, must specify the "delta" parameter')

        params = Params(nb_agents,
                        big_k,
                        always_update=always_update,
                        topology=topology,
                        maxLam=maxLam)
        opt = CDGD(lr=1E-2,
                   decay=0,
                   nesterov=False,
                   nb_agents=nb_agents,
                   params=params,
                   c1=kwargs['c1'],
                   delta=kwargs['delta'])

    elif optimizer == "QCDGD":

        if not 'c1' in kwargs:
            raise ValueError('For QDGD, must specify the "c1" parameter')

        if not 'clipStd' in kwargs:
            raise ValueError('For QDGD, must specify the "clipStd" parameter')

        if not 'ternSt' in kwargs:
            raise ValueError('For QDGD, must specify the "ternSt" parameter')

        if nb_agents == 1:
            big_k = 1
            maxLam = 0.01
            topology = 'full'
            always_update = False

        params = Params(nb_agents,
                        big_k,
                        always_update=always_update,
                        topology=topology,
                        maxLam=maxLam)
        opt = QCDGD(lr=1E-2,
                    decay=0,
                    nesterov=False,
                    nb_agents=nb_agents,
                    params=params,
                    ternSt=kwargs['ternSt'],
                    clip=kwargs['clipStd'],
                    c1=kwargs['c1'])

    initer = 'glorot_uniform'
    if 'initer' in kwargs:
        initer = kwargs['initer']

    identical = True
    if 'identical' in kwargs:
        identical = kwargs['identical']

    model = Models.load(model_name,
                        img_dim,
                        nb_classes,
                        opt,
                        nb_agents=nb_agents,
                        identical=identical,
                        kernel_initializer=initer)

    # model.summary()
    #stop

    step_list = []
    train_losses, train_accs = [], []
    val_losses, val_accs = [], []

    #data = model.fit(X_agent_ins, Y_agent_ins, validation_data=(x_validation, y_validation), epochs=nb_epoch // step_eval, steps_per_epoch=step_eval, batch_size=batch_size, shuffle=True)
    data = model.fit(trainData,
                     validation_data=testData,
                     epochs=nb_epoch // step_eval,
                     steps_per_epoch=step_eval,
                     batch_size=batch_size,
                     shuffle=True)
    #data = model.fit(generator, validation_data=(x_validation, y_validation), epochs=nb_epoch // step_eval, steps_per_epoch=step_eval, batch_size=batch_size, shuffle=True)

    keys = list(data.history.keys())

    offset = 1
    if nb_agents == 1:
        offset = 0

    train_losses = [
        sum(x) for x in zip(
            *
            [data.history.get(key) for key in keys[offset:offset + nb_agents]])
    ]
    train_accs = [
        sum(x) / nb_agents for x in zip(*[
            data.history.get(key)
            for key in keys[offset + nb_agents:offset + 2 * nb_agents]
        ])
    ]
    val_losses = [
        sum(x) for x in zip(*[
            data.history.get(key)
            for key in keys[2 * offset + 2 * nb_agents:2 * offset +
                            3 * nb_agents]
        ])
    ]
    val_accs = [
        sum(x) / nb_agents for x in zip(*[
            data.history.get(key)
            for key in keys[2 * offset + 3 * nb_agents:2 * offset +
                            4 * nb_agents]
        ])
    ]

    step_list = [*range(step_eval, nb_epoch + 1, step_eval)]
    del model

    return paramExpla, parameters, step_list, train_losses, train_accs, val_losses, val_accs
Example #31
0
def train(setting_dict):
    settings = SegSettings(setting_dict, write_logger=True)
    my_logger(settings.simulation_folder + '\logger')

    # Initialize model:
    model = models.Unet_2D(
        encoder_name=settings.encoder_name,
        encoder_depth=settings.encoder_depth,
        encoder_weights=settings.encoder_weights,
        decoder_use_batchnorm=settings.decoder_use_batchnorm,
        decoder_channels=settings.decoder_channels,
        in_channels=settings.in_channels,
        classes=settings.classes,
        activation=settings.activation)
    model.cuda(1)
    model = model.double()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=settings.initial_learning_rate)

    # Initialize 'data_dic', nested dictionary, will contain all losses and dice for all organs
    organs = [
        'all_organs', 'spleen', 'prostate', 'lits', 'brain', 'pancreas',
        'hepatic_vessel', 'left_atrial'
    ]
    count_type = ['total_epochs', 'cur']
    loss_type = ['CE', 'bg_dice', 'organ_dice']
    set_type = ['Training', 'Validation']

    partition_by_organ = dict(zip(organs,
                                  [list()
                                   for i in organs]))  #first dic - organs
    partition_by_count = dict(
        zip(count_type, [partition_by_organ for i in count_type]))
    partition_by_dice = dict(
        zip(loss_type, [partition_by_count.copy() for i in loss_type]))
    data_dic = dict(zip(set_type,
                        [partition_by_dice.copy()
                         for i in set_type]))  ##this is the final dic
    ## data dic shape:
    #  {Training: { Cross Entropy: {all epochs: {spleen:[], prostate:[] etc}
    #                              {cur_epoch: {spleen:[], prostate:[] etc}}
    #               organ dice:    {all epochs: {spleen:[], prostate:[] etc}
    #     #                        {cur_epoch: {spleen:[], prostate:[] etc}}
    #               background dice: {all epochs: {spleen:[], prostate:[] etc}
    #     #                          {cur_epoch: {spleen:[], prostate:[] etc}}}}
    #  {Validation: { Cross Entropy: {all epochs: {spleen:[], prostate:[] etc}
    #                              {cur_epoch: {spleen:[], prostate:[] etc}}
    #               organ dice:    {all epochs: {spleen:[], prostate:[] etc}
    #     #                        {cur_epoch: {spleen:[], prostate:[] etc}}
    #               background dice: {all epochs: {spleen:[], prostate:[] etc}
    #     #                          {cur_epoch: {spleen:[], prostate:[] etc}}}}

    #Initialize datasets
    train_dataset_list = []
    val_dataset_list = []
    for organ in organs[1:]:

        organ_train_dataset = Seg_Dataset(
            organ, settings.definition_file_dir + '/' + organ + '/Training',
            settings.definition_file_dir + '/' + organ + '/Training_Labels', 2,
            settings)
        organ_val_dataset = Seg_Dataset(
            organ, settings.definition_file_dir + '/' + organ + '/Validation',
            settings.definition_file_dir + '/' + organ + '/Validation_Labels',
            2, settings)
        train_dataset_list.append(organ_train_dataset)
        val_dataset_list.append(organ_val_dataset)

    train_dataset = torch.utils.data.ConcatDataset(train_dataset_list)
    val_dataset = torch.utils.data.ConcatDataset(val_dataset_list)
    print(len(train_dataset))

    batch_size = settings.batch_size
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   num_workers=0)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=0)

    print('Training... ')
    num_epochs = 3
    for epoch in range(0, num_epochs):
        epoch_start_time = time.time()
        total_steps = len(train_dataloader)
        for i, sample in enumerate(train_dataloader, 1):
            if i > 50:
                break
            model.train()
            images = sample['image'].double()
            masks = sample['mask'].type(torch.LongTensor)
            masks = masks.unsqueeze(1)
            images = images.to("cuda:1")
            masks = masks.to("cuda:1")
            masks = masks.type(torch.LongTensor)
            masks = masks.cuda(1)

            #Forward pass
            outputs = model(images, sample['task'])
            outputs = outputs.to("cuda:1")
            loss = criterion(outputs.double(), masks[:, 0, :, :])

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(
                f"Epoch [{epoch + 1}/{num_epochs}], Step [{i}/{total_steps}], Loss: {loss.item():4f}",
            )
            logging.info('current task: ' + sample['task'][0])
            logging.info(
                f"Epoch [{epoch + 1}/{num_epochs}], Step [{i}/{total_steps}], Loss: {loss.item():4f}",
            )

            dices = dice(outputs, masks, sample['num_classes'][0], settings)
            background_dice = dices[1]
            organ_dice = dices[2]

            #Update data dic for relevant organ
            data_dic['Training']['organ_dice']['cur'][sample['task']
                                                      [0]].append(organ_dice)
            data_dic['Training']['bg_dice']['cur'][sample['task'][0]].append(
                background_dice)
            data_dic['Training']['CE']['cur'][sample['task'][0]].append(
                loss.item())

            #update data dic [all organ]
            data_dic['Training']['organ_dice']['cur']['all_organs'].append(
                organ_dice)
            data_dic['Training']['bg_dice']['cur']['all_organs'].append(
                background_dice)
            data_dic['Training']['CE']['cur']['all_organs'].append(loss.item())

            if i % 30 == 0:  #save output every 30 batches
                save_output = outputs.cpu().detach().numpy()
                save_samp(sample['image'][0], sample['mask'][0],
                          sample['task'][0], save_output[0][1], epoch, i,
                          settings.snapshot_dir, organ_dice)

            if i % 50 == 0:  #print details every 50 batches
                print(
                    'curr train loss: {}  train organ dice: {}  train background dice: {} \t'
                    'iter: {}/{}'.format(
                        np.mean(
                            data_dic['Training']['CE']['cur']['all_organs']),
                        data_dic['Training']['organ_dice']['cur']
                        ['all_organs'],
                        np.mean(data_dic['Training']['bg_dice']['cur']
                                ['all_organs']), i + 1, len(train_dataloader)))
                logging.info(
                    'curr train loss: {}  train organ dice: {}  train background dice: {} \t'
                    'iter: {}/{}'.format(
                        np.mean(
                            data_dic['Training']['CE']['cur']['all_organs']),
                        data_dic['Training']['organ_dice']['cur']
                        ['all_organs'],
                        np.mean(data_dic['Training']['bg_dice']['cur']
                                ['all_organs']), i + 1, len(train_dataloader)))

        #Update data_dic['total_epochs']
        for l in loss_type:
            for organ in organs:
                data_dic['Training'][l]['total_epochs'][organ].append(
                    np.mean(data_dic['Training'][l]['cur'][organ]))

        ## Validation
        total_steps = len(val_dataloader)
        for i, data in enumerate(val_dataloader):
            if i > 50:
                break
            model.eval()
            images = data['image'].double()
            masks = data['mask'].type(torch.LongTensor)
            masks = masks.unsqueeze(1)
            images = images.to("cuda:1")
            masks = masks.to("cuda:1")

            outputs = model(images, data['task'])
            outputs = outputs.to("cuda:1")

            loss = criterion(outputs.double(), masks[:, 0, :, :])
            print(
                f"Validation Epoch [{epoch + 1}/{num_epochs}], Step [{i}/{total_steps}], Loss: {loss.item():4f}",
            )
            logging.info('current task: ' + sample['task'][0])
            logging.info(
                f"Validation Epoch [{epoch + 1}/{num_epochs}], Step [{i}/{total_steps}], Loss: {loss.item():4f}",
            )

            dices = dice(outputs, masks, data['num_classes'][0], settings)
            background_dice = dices[1]
            organ_dice = dices[2]

            # Update data dic for relevant organ
            data_dic['Validation']['organ_dice']['cur'][sample['task']
                                                        [0]].append(organ_dice)
            data_dic['Validation']['bg_dice']['cur'][sample['task'][0]].append(
                background_dice)
            data_dic['Validation']['CE']['cur'][sample['task'][0]].append(
                loss.item())

            # Update data dic [all organ]
            data_dic['Validation']['organ_dice']['cur']['all_organs'].append(
                organ_dice)
            data_dic['Validation']['bg_dice']['cur']['all_organs'].append(
                background_dice)
            data_dic['Validation']['CE']['cur']['all_organs'].append(
                loss.item())

        # Update data_dic['total_epochs']
        for l in loss_type:
            for organ in organs:
                data_dic['Validation'][l]['total_epochs'][organ].append(
                    np.mean(data_dic['Training'][l]['cur'][organ]))

        print('End of epoch {} / {} \t Time Taken: {} min'.format(
            epoch, num_epochs, (time.time() - epoch_start_time) / 60))
        print('train loss: {} val_loss: {}'.format(
            np.mean(data_dic['Training']['CE']['cur']['all_organs']),
            np.mean(data_dic['Validation']['CE']['cur']['all_organs'])))
        print(
            'train organ dice: {}  train background dice: {} val organ dice: {}  val background dice: {}'
            .format(
                np.mean(
                    data_dic['Training']['organ_dice']['cur']['all_organs']),
                np.mean(data_dic['Training']['bg_dice']['cur']['all_organs']),
                np.mean(
                    data_dic['Validation']['organ_dice']['cur']['all_organs']),
                np.mean(
                    data_dic['Validation']['bg_dice']['cur']['all_organs'])))
import webapp2
import jinja2
import os
import Models

from google.appengine.ext import db
from google.appengine.api import users


payslips = db.GqlQuery("SELECT * "
                "FROM Payslip "
                "WHERE ANCESTOR IS :1 ",
                Models.payslip_key(users.get_current_user().user_id()))
    
income = 0
tax = 0
payslip_count = 0            
for payslip in payslips:
    income+= payslip.income
    tax += payslip.tax
    payslip_count += 1
    
employee = Models.Employee()
 
employee.userid = user.user_id()
employee.income = income
employee.tax = tax
employee.net = income - tax
employee.account_type = "employee"

employee.put()
Example #33
0
        if tv_is_completed:
            break

        browser_status = WebInteraction.source_login(source, browser)
        if browser_status is False:
            ActionLog.log('%s could not logon' % source.login_page)
            continue
        else:
            ActionLog.log(
                'Searching via the search form on %s.' % source.domain, db)
            # we invert the search format and check for each show, not each link in the page

        for s in search.shows_to_download:
            ActionLog.log('Searching for %s.' % str(s), db)
            response_links = WebInteraction.source_search(
                source, str(s), browser)
            correct_links = [
                l for l in response_links if s.episode_in_link(l.text.lower())
            ]
            ActionLog.log(
                'Found %s links for %s on %s' %
                (str(len(correct_links)), str(s), source.domain), db)
            search.process_search_result(correct_links, s, browser, source,
                                         config)
            time.sleep(
                15)  # wait five seconds between searches for warez-bb.org


if __name__ == "__main__":
    database = Models.connect()
    search_all(database)
Example #34
0
def lack_ram():   
    processed_test = []
    processed_val = []
    input_var = T.ftensor3('inputs')
    network,train_fn,val_fn,output = Models.bidirectional_LSTM(input_var=input_var,N_HIDDEN=N_HIDDEN,layer=4,vocab=vocab)
    processed = []
    if(os.path.isfile(weights+'.params')):
        print("loading Weights")
        params.read_model_data(network, weights)
    if(os.path.isfile('stored_batch.p')!=True):
        if(os.path.isfile('stored_processed.p')!=True):
            print('Creating processed sentences file')
            print('Loading english and french data files')
            english_set = pd.read_csv('../data/processed_en',header=None,delimiter=',',names=['english','en_length'])
            french_set = pd.read_csv('../data/processed_fr',header=None,delimiter=',',names=['french','fr_length'])
            print('Combining the files')
            combined_set = pd.concat([english_set,french_set],axis=1)
            print('Removing Duplicates')
            print(len(combined_set['french']))
            combined_set = combined_set.drop_duplicates()
            print(len(combined_set['french'])) 
            print('Grouping sentences together by input and output sentence length')
            processed = create_training_set(combined_set,BATCH_SIZE,BATCH_SIZE)
            print('Store batches in a pickle file')
            pickle.dump(processed,open('stored_processed.p','wb'))
            gc.collect()
        else:
            print('Loading grouped sentences')
            processed = pickle.load(open('stored_processed.p','rb'))
            print('number of grouped sentences',len(processed))
        #print('Creating matrix file for grouped sentences')
        gc.collect()
        #pool = mp.Pool(processes=2)
        #processed_batch = [pool.apply_async(helpers.convert_to_vector,args=(batch,french_vocab,char_to_ix)) for batch in processed]
        #processed_batch = [p.get() for p in processed_batch]
        #for batch in processed:
        #    processed_batch.append(helpers.convert_to_vector(batch,french_vocab,char_to_ix))
        #print(len(processed_batch))
        #print('Dumping matrix data to file')
        #pickle.dump(processed_batch,open('stored_batch.p','wb'))
    else:
        print('Loading input and output matrix file')
        processed_batch = pickle.load(open('stored_batch.p','rb'))
    #print(ix_to_char)
    print("Shuffle and set validation set")
    shuffle(processed) #Shuffle Batches
    processed_test = processed[:len(processed)-50]
    processed_val = processed[len(processed)-50:]
    #processed_test = processed[:1]
    #processed_val = processed[501:502]
    for i in range(epoch):  
	shuffle(processed_test)   
        train_main_b = 0
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in processed_test:           
            curr_batch = helpers.convert_to_vector(batch,french_vocab,char_to_ix)
            fr,eng = helpers.shift_to_input(curr_batch,0,ix_to_vector)
            train_err += train_fn(fr,eng[:,0])
            train_batches += 1
            train_main_b += 1
            print("new batch ",train_main_b,len(processed_test))
            if(train_main_b % 2000 == 0):
                print("saving model",train_main_b)
                params.write_model_data(network, weights)
            for word in range(1,curr_batch[1].shape[1]-1):
                #print(word)
                #print(T.argmax(lasagne.layers.get_output(network,fr,allow_input_downcast=True),axis=1).eval())
                #eng[:,0] = T.argmax(lasagne.layers.get_output(network,fr,allow_input_downcast=True),axis=1).eval().transpose()
                fr,eng = helpers.shift_to_input([fr,eng],word,ix_to_vector)
                train_err += train_fn(fr,eng[:,0])
                train_batches += 1

        #params.write_model_data(network, weights)
        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in processed_val:
            curr_batch = helpers.convert_to_vector(batch,french_vocab,char_to_ix)
            fr,eng = helpers.shift_to_input(curr_batch,0,ix_to_vector)
            error,acc = val_fn(fr,eng[:,0])
            val_err += error
            val_acc += acc
            val_batches += 1
            for word in range(1,curr_batch[1].shape[1]-1):
                #eng[:,0] = T.argmax(lasagne.layers.get_output(network,fr,allow_input_downcast=True),axis=1).eval().transpose()
                fr,eng = helpers.shift_to_input([fr,eng],word,ix_to_vector)
                error,acc = val_fn(fr,eng[:,0])
                val_err += error
                val_acc += acc
                val_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            i, epoch, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(
            val_acc / val_batches * 100))
        params.write_model_data(network, weights)
Example #35
0
            finally:
                signal.alarm(0)
            return res

        return wraps(func)(wrapper)

    return decorator


df = extract(range(2016, 2021), 'ES')
dd = Description.Data(df)
dmp = Decompose(df)

seconds = 180
methods = ['nm', 'bfgs', 'lbfgs']
model = Models.Arima(df, 0.8)


@timeout(seconds, os.strerror(errno.ETIMEDOUT))
def fit(kw, method):
    start_time = time.time()
    model.fit(kw, method)
    error = mean_squared_error(model.time_series(kw, False), model.predict())
    print(
        f"Method: {method}, MSE: {error}, AIC: {model.aic}, Time: {time.time() - start_time} seconds"
    )


for method in methods:
    try:
        fit('jamon', method)
def eICU_model_creator(config):
    """Constructor function for the model(s) to be optimized.

    You will also need to provide a custom training
    function to specify the optimization procedure for multiple models.

    Args:
        config (dict): Configuration dictionary passed into ``TorchTrainer``.

    Returns:
        One or more torch.nn.Module objects.
    """
    model_class = config.get('model', 'VanillaRNN')
    if model_class == 'VanillaRNN':
        return Models.VanillaRNN(
            config.get('n_inputs', 2090),
            config.get('n_hidden', 100),
            config.get('n_outputs', 1),
            config.get('n_rnn_layers', 2),
            config.get('p_dropout', 0.2),
            bidir=config.get('bidir', False),
            total_length=config.get('total_length', None),
            embed_features=config.get('embed_features', None),
            n_embeddings=config.get('n_embeddings', None),
            embedding_dim=config.get('embedding_dim', None))
    elif model_class == 'VanillaLSTM':
        return Models.VanillaLSTM(
            config.get('n_inputs', 2090),
            config.get('n_hidden', 100),
            config.get('n_outputs', 1),
            config.get('n_rnn_layers', 2),
            config.get('p_dropout', 0.2),
            bidir=config.get('bidir', False),
            total_length=config.get('total_length', None),
            embed_features=config.get('embed_features', None),
            n_embeddings=config.get('n_embeddings', None),
            embedding_dim=config.get('embedding_dim', None))
    elif model_class == 'TLSTM':
        return Models.TLSTM(config.get('n_inputs', 2090),
                            config.get('n_hidden', 100),
                            config.get('n_outputs', 1),
                            config.get('n_rnn_layers', 2),
                            config.get('p_dropout', 0.2),
                            embed_features=config.get('embed_features', None),
                            n_embeddings=config.get('n_embeddings', None),
                            embedding_dim=config.get('embedding_dim', None),
                            elapsed_time=config.get('elapsed_time', None))
    elif model_class == 'MF1LSTM':
        return Models.MF1LSTM(config.get('n_inputs', 2090),
                              config.get('n_hidden', 100),
                              config.get('n_outputs', 1),
                              config.get('n_rnn_layers', 2),
                              config.get('p_dropout', 0.2),
                              embed_features=config.get(
                                  'embed_features', None),
                              n_embeddings=config.get('n_embeddings', None),
                              embedding_dim=config.get('embedding_dim', None),
                              elapsed_time=config.get('elapsed_time', None))
    elif model_class == 'MF2LSTM':
        return Models.MF2LSTM(config.get('n_inputs', 2090),
                              config.get('n_hidden', 100),
                              config.get('n_outputs', 1),
                              config.get('n_rnn_layers', 2),
                              config.get('p_dropout', 0.2),
                              embed_features=config.get(
                                  'embed_features', None),
                              n_embeddings=config.get('n_embeddings', None),
                              embedding_dim=config.get('embedding_dim', None),
                              elapsed_time=config.get('elapsed_time', None))
    else:
        raise Exception(
            f'ERROR: {model_class} is an invalid model type. Please specify either "VanillaRNN", "VanillaLSTM", "TLSTM", "MF1LSTM" or "MF2LSTM".'
        )
Example #37
0
def main(sids,logger):
    SuiteList = DataProvider.getCaseData(logger,sids)
    logger.debug("run SuiteIDs:",SuiteList.keys())
    report = {}
    # report {sid:{status:pass/fail,cost:time,detail:{case:pass/fail/norun}}} **update 2016-2-16
    conf=EnvInit.config()
    # print conf.host
    # sys.exit()
    for sid in SuiteList.keys():
        logger.debug("++++++"+sid+"++++++"+"begin")
        begintime=time.time()
        report[sid]={}
        for case in SuiteList[sid]:
            for pk in case.param.keys():
                if case.param[pk].startswith('$$'):
                    logger.debug('debug main ',case.param[pk])
                    tmpList = case.param[pk][2:].split('.')
                    tmpSid = tmpList[0]
                    tmpCid = tmpList[1]
                    tmpAttrList,tmpFun = getAttrList(case.param[pk],re.compile('\[(.+?)\]'))
                    for tc in SuiteList[sid]:
                        if tc.cid ==tmpCid and tc.sid == tmpSid:
                            case.param[pk]=tc.getResValue(tmpAttrList)
                            logger.debug( "main ...................... update ",case.__hash__())

                tc=Models.contain(case.param[pk],Models.RESERVEDWORD.keys())
                if tc!=None:
                    case.param[pk]=Models.RESERVEDWORD[tc](case.param[pk])
            logger.debug("main param.....",case.cid,case.sid,case.param)
            r,c = request(case,conf,logger)
            logger.debug("main response..",c)
            if r['status']!='200':
                report[sid]={'status':False,
                             'cost':time.time()-begintime,
                             'detail':{(case.cid,case,sid):False}}
                break
            case.res = c
            assertobj = AssertMain(c,case.asex,case.param,logger)
            logger.debug("main assertobj...",assertobj)
            if assertobj['status']:
                report[sid]['status']=True
                if report[sid].keys().count('detail')>0:
                    report[sid]['detail'][(case.cid,case.sid)]=assertobj
                else:
                    report[sid]['detail']={(case.cid,case.sid):assertobj}
            else:
                report[sid]['status']=False
                report[sid]['cost']=time.time()-begintime
                report[sid]['detail'][(case.cid,case.sid)]=assertobj
                break
            if case.otherAction!='':
                logger.debug( case.otherAction)
                eval(case.otherAction)
        if report[sid].keys().count('status')==0:
            report[sid]['status']=True
            report[sid]['cost']=time.time()-begintime
        logger.debug("++++++",sid,"++++++","end")
    logger.debug( report)
    logger.debug("dump report file begin")
    GenerateReport.Report(report)
    logger.debug("dump report file end")
    def get(self):
        
        user = users.get_current_user() 
        
        if user:
            
            pasyer_users = db.GqlQuery("SELECT * "
                "FROM Site_User "
                "WHERE userid = :1 ",
                user.user_id())
            
            current_user = False
            for pasyer_user in pasyer_users:
                current_user = pasyer_user

            if not current_user:
                self.redirect("/settings")
            else:
            
                payslips = db.GqlQuery("SELECT * "
                    "FROM Payslip "
                    "WHERE ANCESTOR IS :1 ",
                    Models.payslip_key(user.user_id()))
        
                income = 0
                tax = 0
                payslip_count = 0            
                for payslip in payslips:
                    income+= payslip.income
                    tax += payslip.tax
                    payslip_count += 1
                    
                files = db.GqlQuery("SELECT * "
                    "FROM File "
                    "WHERE ANCESTOR IS :1 ",
                    Models.file_key(user.user_id()))
                file_count = 0    
                for file in files:
                    file_count += 1
                    
    
                #set stylesheets needed per page 
                specific_urls = """
                    <link type="text/css" rel="stylesheet" href="/stylesheets/""" + self.__class__.__name__ + """.css" />
                """
                
                dashboard_template_values = {
                    'name': current_user.name,
                    'email': current_user.email,
                    'account_type': current_user.account_type,
                    'payslip_quantity': payslip_count,
                    'file_quantity': file_count,
                    'income': income,
                    'tax': tax,
                    'net': income - tax
                }
                
                template = jinja_environment.get_template('Page_Content/dashboard.html')
                dashboard_template = template.render(dashboard_template_values)
                
                url = users.create_logout_url(self.request.uri)
                nav = """
                <nav>
                    <ul>
                        <li><a href="/dashboard">Dashboard</a></li>
                        <li><a href="#">Design</a></li>
                        <li><a href="#">About</a></li>
                        <li><a href="%s">Logout</a></li>
                    </ul>
                </nav>
                """ % url
               
                    
                template_values = {
                    'specific_urls':specific_urls,
                    'nav': nav,
                    'content': dashboard_template
                }
               
                template = jinja_environment.get_template('index.html')
                self.response.out.write(template.render(template_values))
        else:
            self.redirect('/')
import Models

print(Models.Add(3, 56))
print(Models.Sub(90, 56))
print(Models.Modulus(3, 2))
print(Models.division(9, 3))
Example #40
0
    def get(self):
        if users.get_current_user():
            # GQL query to get the payslips from the datastore
            payslips = db.GqlQuery(
                "SELECT * " "FROM Payslip " "WHERE ANCESTOR IS :1 ",
                Models.payslip_key(users.get_current_user().user_id()),
            )

            list1 = [0, 0, 0, 0]
            list2 = [0, 0, 0, 0]

            # Set the list to the first 4 payslips income and tax for the graph
            index = 0
            for payslip in payslips:
                if index >= 4:
                    break
                list1[index] = payslip.income
                list2[index] = payslip.tax
                index += 1

            # set specific stylesheets and scripts needed per page
            # set the values for the graph
            specific_urls = (
                """
                <link type="text/css" rel="stylesheet" href="/stylesheets/"""
                + self.__class__.__name__
                + """.css" />
                <script type="text/javascript" src="https://www.google.com/jsapi"></script>
                <script type="text/javascript">
                  google.load("visualization", "1", {packages:["corechart"]});
                  google.setOnLoadCallback(drawChart);
                  function drawChart() {
                    var data = google.visualization.arrayToDataTable([
                      ['Payslip', 'Income', 'Tax'],
                      ['1',  """
                + str(list1[0])
                + """,      """
                + str(list2[0])
                + """],
                      ['2',  """
                + str(list1[1])
                + """,      """
                + str(list2[1])
                + """],
                      ['3',  """
                + str(list1[2])
                + """,      """
                + str(list2[2])
                + """],
                      ['4',  """
                + str(list1[3])
                + """,      """
                + str(list2[3])
                + """]
                    ]);
            
                    var options = {
                      title: 'Income Breakdown',
    
                    };
            
                    var chart = new google.visualization.AreaChart(document.getElementById('chart-div'));
                    chart.draw(data, options);
                  }
                </script>
            """
            )

            # Set the nested template values to the generate payslip function
            payslip_template_values = {"payslips": generate_payslip_html(self, payslips)}

            template = jinja_environment.get_template("Page_Content/payslips.html")
            payslip_template = template.render(payslip_template_values)

            myFile = open("Page_Content/payslips.html", "r")

            # Create a log out url for the user
            url = users.create_logout_url(self.request.uri)
            # set the nav list
            nav = (
                """
            <nav>
                <ul>
                    <li><a href="/dashboard">Dashboard</a></li>
                    <li><a href="/design">Design</a></li>
                    <li><a href="/about">About</a></li>
                    <li><a href="%s">Logout</a></li>
                </ul>
            </nav>
            """
                % url
            )

            template_values = {"specific_urls": specific_urls, "nav": nav, "content": payslip_template}

            template = jinja_environment.get_template("index.html")
            self.response.out.write(template.render(template_values))
        else:
            self.redirect("/")
def main(args=None):
    # Setting up configuration TODO: use docopt
    if(args):
        configuration_string = args[0]
    else:
        configuration_string = "Configuration/default.conf"

    config = ConfigParser.ConfigParser()
    config.read(configuration_string)
    rawdata_directory = config.get("Directories", "dir_rawdata")
    storedmodel_directory = config.get("Directories", "dir_storedmodel")
    cluster_json_directory = config.get("Directories", "dir_clusters")

    # Parameters TODO: set these in config file
    # Clustering
    cluster_feature_names = ["StartLat", "StartLong", "EndLat", "EndLong"]  # Which features to cluster over
    clustering_alg = "KMeans"                                    # Which Clustering Algorithm to use
    cluster_model_file = config.get("Batch", "cluster_class_file")
    cluster_params = {"max_clusters": 10, "n_init": 10}           # Parameters for KMeans

    # Initial Classification
    init_class_alg = "RandomForest"
    init_class_model_file = config.get("Batch", "init_class_file")
    init_class_feature_names = ["StartLat", "StartLong"]  # Which features to cluster over

    # Online Classification
    online_class_alg = "RandomForest"
    online_class_model_file = config.get("Batch", "online_class_file")
    online_class_feature_names = ["Latitude", "Longitude", "StartLat", "StartLong"]

    # Read in batch data
    raw_data = sc.textFile(rawdata_directory)
    json_data = raw_data.map(lambda x: json.loads(x))
    pair_rdd = json_data.filter(bool).map(lambda x: (x["journey_id"], x))

    raw_journeys = pair_rdd.combineByKey(lambda value: [value], lambda acc, value: acc + [value], lambda acc1, acc2: add(acc1, acc2))
    processed_journeys = raw_journeys.mapValues(lambda x: Data.load_batch_data(x))

    journeys = processed_journeys.map(lambda x: (x[1].data["vin"][0], x[1]))
    journeys_by_vin = journeys.combineByKey(lambda value: [value], lambda acc, value: acc + [value], lambda acc1, acc2: add(acc1, acc2))

    # Build, assign and save clusters
    journeys_with_id = journeys_by_vin.mapValues(lambda data: Models.cluster(clustering_alg, cluster_feature_names, cluster_params, data))
    journeys_with_id.persist()
    journey_clusters = journeys_with_id.mapValues(lambda journeys: Data.create_journey_clusters(journeys)).persist()
    journey_clusters_local = journey_clusters.collectAsMap()
    joblib.dump(journey_clusters_local, storedmodel_directory + cluster_model_file + "_JourneyClusters")
    cluster_json = journey_clusters.map(Data.extract_journey_json).collect()
    with open(cluster_json_directory + "clusters.json", "w") as f:
        for cluster in cluster_json:
            f.write(cluster + "\n")
    journey_clusters.unpersist()

    # Build initial classification models
    init_class_models = journeys_with_id.mapValues(lambda data: Models.train_init_class_model(init_class_alg, init_class_feature_names, data)).collectAsMap()
    joblib.dump(init_class_models, storedmodel_directory + init_class_model_file)

    # Build online classification models
    online_class_models = journeys_with_id.mapValues(lambda data: Models.train_online_class_model(online_class_alg, online_class_feature_names, data)).collectAsMap()
    joblib.dump(online_class_models, storedmodel_directory + online_class_model_file)

    sc.stop()
title_str = '{:30s}{:<30}{:<20}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}\n'
log_file.write(
    title_str.format('data_name', 'model_name', 'loss_function', 'num_train',
                     'batch_size', 'max_iters', 'uncmp_psnr', 'uncmp_rmse',
                     'uncmp_ssim', 'valid_psnr', 'valid_rmse', 'valid_ssim'))
log_file.close()

# resize the input images if input_size is not None
input_size = None
# input_size = (256, 256) # we can also use a low-res input to reduce memory usage and speed up training/testing with a sacrifice of precision
resetRNGseed(0)

# create a CompenNeSt
# load pre-trained CompenNeSt on Blender dataset
ckpt_file = '../../checkpoint/blender_pretrained_CompenNeSt_l1+ssim_50000_32_20000_0.0015_0.8_2000_0.0001_20000.pth'
compen_nest = Models.CompenNeSt()
if torch.cuda.device_count() >= 1:
    compen_nest = nn.DataParallel(compen_nest,
                                  device_ids=device_ids).to(device)
compen_nest.load_state_dict(torch.load(ckpt_file))
compen_nest.device_ids = device_ids

# stats for different setups
for data_name in data_list:
    # load training and validation data
    data_root = fullfile(dataset_root, data_name)
    cam_surf, cam_train, cam_valid, prj_train, prj_valid, mask_corners = loadData(
        dataset_root, data_name, input_size, CompenNeSt_only=False)

    # surface image for training and validation
    cam_surf_train = cam_surf.expand_as(cam_train)
Example #43
0
# transform skewed data
skewed = ['capital-gain', 'capital-loss']
features_log_transformed = pd.DataFrame(data = features_raw)
features_log_transformed[skewed] = features_raw[skewed].apply(lambda x: np.log(x + 1))

# Normalize numerical features
scaler = MinMaxScaler() # default=(0, 1)
numerical = ['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
features_log_minmax_transform = pd.DataFrame(data = features_log_transformed)
features_log_minmax_transform[numerical] = scaler.fit_transform(features_log_transformed[numerical])

# One-hot encode categorical features
features_final = pd.get_dummies(features_log_minmax_transform)
income = income_raw.map({'>50K': 1, '<=50K': 0})

# Shuffle and Split data
X_train, X_test, y_train, y_test = train_test_split(features_final,
                                                    income,
                                                    test_size = 0.2,
                                                    random_state = 0)

# Evaluate Model Performance with fbeta = 0.5
fbeta = 0.5
best_clf = Models.evaluate_models(X_train,y_train,X_test,y_test,fbeta)
print("\n",best_clf.__class__.__name__)
best_clf = Models.optimize_best_model(best_clf,X_train,y_train,X_test,y_test)
model_predictions = best_clf.predict(X_test)
print("Final accuracy score on the testing data: {:.4f}".format(accuracy_score(y_test, model_predictions)))
print("Final F-score on the testing data: {:.4f}".format(fbeta_score(y_test, model_predictions, beta = 0.5)))
Example #44
0
 def __init__(self, device):
     self.device = device
     self.model = Models.ResNet()
     self.model.to(self.device)
     self.model.load_state_dict(torch.load("C:/Users/Joab-PC/Desktop/FYP/GUI/DANN/DANN_ResNet_Darkdata_100e.pth", map_location="cuda:0"))
Example #45
0
    def initialize_from_name(self):
        """ Try to initialize a monster object from a string. Lots of craziness
        here to protect the users from themselves.

        Note also that we're overwriting a method of Models.py with this!
        """

        # sanity warning
        if "_" in self.name:
            self.logger.warn(
                "Asset name '%s' contains underscores. Names should use whitespace."
                % self.name)
            self.logger.warn("Attempting to initialize by handle...")
            self.handle = self.name
            self.initialize_from_handle()
            return True

        # first, check for an exact name match (long-shot)
        asset_dict = self.assets.get_asset_from_name(self.name)
        if asset_dict is not None:
            self.initialize_asset(asset_dict)
            return True

        # next, split to a list and try to set asset and level
        name_list = self.name.split(" ")

        # accept any int in the string as the level
        for i in name_list:
            if i.isdigit():
                setattr(self, "level", int(i))

        # now iterate through the list and see if we can get a name from it
        for i in range(len((name_list))):
            parsed_name = " ".join(name_list[:i])
            asset_dict = self.assets.get_asset_from_name(parsed_name)
            if asset_dict is not None:
                self.initialize_asset(asset_dict)
                if len(name_list) > i and name_list[i].upper() not in [
                        "LEVEL", "LVL", "L"
                ]:
                    setattr(self, "comment", (" ".join(name_list[i:])))
                return True

        # finally, create a list of misspellings and try to get an asset from that
        #   (this is expensive, so it's a last resort)
        m_dict = {}
        for asset_handle in self.assets.get_handles():
            asset_dict = self.assets.get_asset(asset_handle)
            if "misspellings" in asset_dict.keys():
                for m in asset_dict["misspellings"]:
                    m_dict[m] = asset_handle

        for i in range(len((name_list)) + 1):
            parsed_name = " ".join(name_list[:i]).upper()
            if parsed_name in m_dict.keys():
                asset_handle = m_dict[parsed_name]
                self.initialize_asset(self.assets.get_asset(asset_handle))
                if len(name_list) > i and name_list[i].upper() not in [
                        "LEVEL", "LVL", "L"
                ]:
                    setattr(self, "comment", (" ".join(name_list[i:])))
                return True

        # if we absolutely cannot guess wtf monster name this is, give up and
        #   throw a utils.Asseterror()
        if self.handle is None:
            raise Models.AssetInitError(
                "Asset name '%s' could not be translated to an asset handle!" %
                self.name)
Example #46
0
def train():
  data_out_dir = '/media/tk/EE44DA8044DA4B4B/cataract_phase_img'
  height = 224
  width = 224
  skip_rate = 10
  batch = 32
  nb_classes = 11
  nb_epoch = 100
  current_batch_count = 1

  out_dir_name = 'ResNet50Pretrain_phase'                             ## CHECK THIS!!!!!!!!!
  activation = "relu"                                                           ## CHECK THIS!!!!!!!!!
  momentum = 0.9
  lr = 0.01
  optimizer = SGD(lr=lr, momentum=momentum, decay=0.0, nesterov=True)           ## CHECK THIS!!!!!!!!!
  loss = 'categorical_crossentropy'
  model = Models.resnet(nb_classes)
  model.compile(optimizer,
                loss=loss, 
                metrics=['accuracy'])

  X = np.zeros((batch,height,width,3))
  Y = np.zeros((batch,nb_classes))

  for e in range(0,nb_epoch):
    ACC = 0.
    LOSS = 0.
    N = 0
    for vid_num in sample_lengths.keys():
      lmdb_env_x = lmdb.open(os.path.join(data_out_dir,vid_num+"X"))
      lmdb_txn_x = lmdb_env_x.begin()
      lmdb_cursor_x = lmdb_txn_x.cursor()

      lmdb_env_y = lmdb.open(os.path.join(data_out_dir,vid_num+"y"))
      lmdb_txn_y = lmdb_env_y.begin()
      lmdb_cursor_y = lmdb_txn_y.cursor()

      
      indices = list(range(0,int(sample_lengths[vid_num]/skip_rate)))
      np.random.shuffle(indices)

      label = np.frombuffer(lmdb_cursor_y.get('{:0>8d}'.format(0).encode()),dtype=np.dtype(np.int64))
      for index in indices:
        real_frame_ind = index*skip_rate
        try:
          value = np.frombuffer(lmdb_cursor_x.get('{:0>8d}'.format(index).encode()),dtype=np.dtype(np.uint8))
        except:
          continue
          #pdb.set_trace()

        x = value.reshape((height,width,3))
        x.setflags(write=1)
        x = x.astype(np.float)
        x -= 128
        x /= 128.0
        y = label[real_frame_ind]

        X[current_batch_count] = x
        Y[current_batch_count,y] = 1
        current_batch_count += 1

        if (current_batch_count % batch) == 0:
          losses = model.train_on_batch(X, Y)
          ACC += losses[1]  # current accuracy distinguishing real-vs-fake
          LOSS += losses[0]
          N += 1

          print("epoch: {:03d} | loss: {:.03f} | acc: {:.03f} \r".format(e,LOSS/N,ACC/N), end='\r')
          ## TRAIN()
          
          X = np.zeros((batch,height,width,3))
          Y = np.zeros((batch,nb_classes))
          current_batch_count = 0
    print("Finished with epoch:", e,"\n")
    model_file = './weights/'+ out_dir_name + '_ep:%03d_acc:%0.3f_loss:%0.3f.h5' % (e+1,(LOSS/N),(ACC/N))
    model.save_weights(model_file, overwrite=True)
def experiment_poly_svm(train, test, featurizer):
    data = Features.make_experiment_matrices(train, test, featurizer)
    return Models.report_SVM_polyK(data['train_X'], data['train_Y'], data['test_X'], data['test_Y'])