Exemple #1
0
    def test_corpus(self):

        with open("../data/pt_BR/nnp") as f:
            nnp = [line.rstrip() for line in f.readlines()]
        with open("../data/pt_BR/terms") as f:
            terms = [line.rstrip() for line in f.readlines()]
        with open("../data/pt_BR/patterns") as f:
            patterns = [line.rstrip() for line in f.readlines()]

        data = LoadData(['../corpus/sel1.csv', '../corpus/sel2.csv']).load()
        p = PreProcessing(nnp, terms, patterns)

        tokens = []
        for d in data.values():
            tokens += p.clean_and_stem(d)

        bow, bow_features_names = p.build_bow(tokens)
        dist = np.sum(bow.toarray(), axis=0)
        tbow = {}
        for term, count in zip(bow_features_names, dist):
            tbow[term] = count

        import operator
        with open("bow", "w") as f:
            f.write(str(len(tbow)))
            f.write(
                str(
                    sorted(tbow.items(),
                           key=operator.itemgetter(1),
                           reverse=True)))

        terms = p.compute_tfidf(data.values(), eliminate_zeros=True)
        with open("terms", "w") as f:
            f.write(str(terms))
Exemple #2
0
def testall(directory, pred_file=None, label_file=None, out_path=None):
    folders = os.listdir(directory)
    networks = []
    for folder in folders:
        if os.path.isfile(directory+folder+"/network.cfg") and os.path.exists(directory+folder+"/results"):
            networks.append(folder)
    
    config_file = directory+networks[0]+"/network.cfg"
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    
    test_data = LoadData(directory = config.get('Testing Data', 'folders').split(','), 
                         data_file_name = config.get('Testing Data', 'data_file'),
                         label_file_name = config.get('Testing Data', 'label_file'),
                         seg_file_name = config.get('Testing Data', 'seg_file'))
    
    res = Analyzer(raw = test_data.get_data()[0], 
                   target = test_data.get_labels()[0])
                   
    for net in networks:
        config_file = directory+net+"/network.cfg"
        config = ConfigParser.ConfigParser()
        config.read(config_file)
        
        res.add_results(results_folder = config.get('General','directory'),
                        name = net,
                        prediction_file = config.get('Testing', 'prediction_file')+'_0', 
                        learning_curve_file = 'learning_curve')
                           
        res.analyze(-1, pred_file=pred_file, label_file=label_file, out_path=out_path)
        
    return res
Exemple #3
0
def index():
   endpoints_reader = LoadData()
   data = Processor()
   df_csv, df_json = endpoints_reader.read_data()
   summary_csv_info, summary_json_info = data.process_endpoints(df_csv, df_json)
   return 'Service A result: ' + summary_csv_info + \
           '\n' + 'Service B result: ' +  summary_json_info
Exemple #4
0
 def init_with_data(self,
                    is_load=True,
                    max_outgoing=10,
                    max_airports=500,
                    silent=False):
     ld = LoadData(self, is_load, max_outgoing, max_airports, silent)
     ld.load()
     silent or self.print_info()
Exemple #5
0
def start_rnn():
    load_data = LoadData()
    process_data = ProcessDataRNN(sales=load_data.make_dataset(
        categorize=CATEGORIZE, start=START, upper=UPPER))
    # xt, yt, xv, yv, xe, ye = process_data.run()
    xv, yv, xe, ye = process_data.run()

    if not ONLY_LOAD_MODEL:
        ckpt = ModelCheckpoint(OUTPUT_MODEL,
                               monitor='val_loss',
                               verbose=1,
                               save_best_only=True,
                               mode='min')
        reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                      factor=0.2,
                                      patience=5,
                                      min_lr=MIN_LR,
                                      verbose=1)
        es = EarlyStopping(monitor='val_loss', patience=3)
        # net = make_model(len(process_data.FEATS))
        net = make_model_rnn(process_data.Z.shape[2])
        print(net.summary())

        n_slices = LEN // 28
        brks = np.array(
            [LEN - (n_slices - i) * 28 for i in range(n_slices + 1)])
        brks = brks[brks >= max(LAGS) + 28]
        print("#" * 30)
        print(LEN, process_data.C.shape, process_data.Z.shape)
        print(brks)
        print(process_data.C.min(), process_data.ys.min(),
              process_data.Z[:, 66:].min())
        print("#" * 30)
        net.fit_generator(DataGenerator(
            (process_data.C, process_data.Z, process_data.ys),
            brks[:-1],
            batch_size=BATCH_SIZE),
                          epochs=EPOCH,
                          validation_data=(xv, yv),
                          callbacks=[ckpt, reduce_lr, es])

        # net.fit(xt, yt, batch_size=BATCH_SIZE, epochs=EPOCH, validation_data=(xv, yv), callbacks=[ckpt, reduce_lr, es])

    # nett = make_model(len(process_data.FEATS))
    nett = make_model_rnn(process_data.Z.shape[2])
    nett.load_weights(OUTPUT_MODEL)

    pv = nett.predict(xv, batch_size=BATCH_SIZE, verbose=1)
    pe = nett.predict(xe, batch_size=BATCH_SIZE, verbose=1)
    print("Eva result:", nett.evaluate(xv, yv, batch_size=BATCH_SIZE))

    # pv = pv.reshape((-1, 28, 9))
    # pe = pe.reshape((-1, 28, 9))
    sv = process_data.sv.reshape((-1, 28))
    se = process_data.se.reshape((-1, 28))
    # Yv = yv.reshape((-1, 28))
    return process_data, yv, pv, pe, sv, se
 def test_load_data_full_csv(self):
    endpoints_reader = LoadData()
    json_link = '[{"date":"22-01-2021", "impressions":1376}, \
                  {"date":"21-01-2021","impressions":1906}, \
                  {"date":"20-01-2021","impressions":2818}, \
                  {"date":"19-01-2021","impressions":1024}]'
    csv_link = 'mock_unittest.csv'
    csv_file, _ = endpoints_reader.read_data(csv_link, json_link)
    self.assertEqual(csv_file.shape[0], 2)
def main():
    # ------------------ config ------------------ #
    n_classes = 130
    batch_size = 10
    epochs = 25
    save_model_path = './'

    # -------------------------------------------- #

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'valid':
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    dataset = LoadData(data_dir='./dataset/JsonData/',
                       mode='train',
                       transforms=data_transforms['train'])

    train_loaders = torch.utils.data.DataLoader(dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

    dataset = LoadData(data_dir='./dataset/JsonData/',
                       mode='valid',
                       transforms=data_transforms['valid'])

    valid_loaders = torch.utils.data.DataLoader(dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

    dataloaders = {'train': train_loaders, 'valid': valid_loaders}

    print('GPUs Available:', torch.cuda.is_available())
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("training on ", device)

    net = Net(n_classes=n_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    loss = torch.nn.CrossEntropyLoss()

    train(net, criterion, optimizer, loss, device, dataloaders, epochs,
          save_model_path)
def main():
    # ------------------ config ------------------ #
    n_classes = 130
    batch_size = 2
    epochs = 30
    save_model_path = './'
    img_size = 448
    # -------------------------------------------- #

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomResizedCrop(img_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'valid':
        transforms.Compose([
            transforms.Resize(img_size + 50),
            transforms.CenterCrop(img_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    dataset = LoadData(data_dir='../dataset/JsonData/',
                       mode='train',
                       transforms=data_transforms['train'])

    train_loaders = torch.utils.data.DataLoader(dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

    dataset = LoadData(data_dir='../dataset/JsonData/',
                       mode='valid',
                       transforms=data_transforms['valid'])

    valid_loaders = torch.utils.data.DataLoader(dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

    dataloaders = {'train': train_loaders, 'valid': valid_loaders}

    print('GPUs Available:', torch.cuda.is_available())
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("training on ", device)

    net = Net(n_classes=n_classes, pretrained=False).to(device)
    optimizer = optim.SGD(net.parameters(), lr=0.08, momentum=0.9)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)
    loss = torch.nn.CrossEntropyLoss(size_average=False)

    train_model(net, optimizer, loss, scheduler, device, dataloaders, epochs,
                save_model_path)
 def test_processor(self):
   endpoints_reader = LoadData()
   data = Processor()
   json_link = '[{"date":"22-01-2021", "impressions":1376}, \
                  {"date":"21-01-2021","impressions":1906}, \
                  {"date":"20-01-2021","impressions":2818}, \
                  {"date":"19-01-2021","impressions":1024}]'
   csv_link = 'mock_unittest.csv'
   df_csv, df_json = endpoints_reader.read_data(csv_link, json_link)
   summary_csv_info, summary_json_info = data.process_endpoints(df_csv, df_json)
   self.assertEqual(str(summary_csv_info), '{"mean": 1732.0, "sum": 3464}')
   self.assertEqual(str(summary_json_info), '{"mean": 1781.0, "sum": 7124}')
Exemple #10
0
    def test_load(self):
        """test load_data.py.

        Args:
            no
        """

        load = LoadData('test_load_data')
        load.main()
        active = redis.StrictRedis(host="192.168.0.2", port=6379, db=6, password='******')

        # check up
        self.assertEqual(len(active.keys("*:*:2018-12-09 *")),
                         4, "total number check failed")
        self.assertEqual(len(active.keys("visit:*:2018-12-09 *")),
                         1, "visit incre data check failed")
        self.assertEqual(len(active.keys("search:*:2018-12-09 *")),
                         3, "search incre data check failed")
        self.assertEqual(active.zcard("visit:2018-12-09 00"),
                         1, "visit set num check failed")
        self.assertEqual(active.zcard("search:2018-12-09 01"),
                         3, "search set num check failed")
        self.assertEqual(active.get("stock:600796:jianpin"), "qjsh",
                         "jianpin check failed")
        self.assertEqual(int(active.get("visit:000001:2018-12-09 00")),
                         33, " visit value check failed")
        index_visit_set = active.zrange("visit:2018-12-09 00", 0, -1).index("000001")
        self.assertEqual(active.zrange("visit:2018-12-09 00", index_visit_set,
                                       index_visit_set, withscores=True)[0][1], 33.0,
                         "visit set value check failed")
        self.assertEqual(int(active.get("search:000007:2018-12-09 01")),
                         13, "search value check failed")
        self.assertEqual(int(active.get("search:000721:2018-12-09 01")),
                         13, "search value check failed")
        self.assertEqual(int(active.get("search:600796:2018-12-09 01")),
                         13, "search value check failed")
        search_set_keys = active.zrange("search:2018-12-09 01", 0, -1)
        index_search_set_1 = search_set_keys.index("000007")
        index_search_set_2 = search_set_keys.index("000721")
        index_search_set_3 = search_set_keys.index("600796")
        self.assertEqual(active.zrange("search:2018-12-09 01", index_search_set_1,
                                       index_search_set_1, withscores=True)[0][1],
                         13.0, "search set value check failed")
        self.assertEqual(active.zrange("search:2018-12-09 01", index_search_set_2,
                                       index_search_set_2, withscores=True)[0][1],
                         13.0, "search set value check failed")
        self.assertEqual(active.zrange("search:2018-12-09 01", index_search_set_3,
                                       index_search_set_3, withscores=True)[0][1],
                         13.0, "search set value check failed")
 def input_drug_gene_condense():
     dir_opt = '/datainfo'
     deletion_list = []
     final_dl_input_df = pd.read_table(
         './datainfo/mid_data/final_GDSC2_dl_input.txt', delimiter=',')
     drug_map_dict, drug_dict, gene_target_num_dict = LoadData(
         dir_opt).pre_load_dict()
     target_index_list = gene_target_num_dict.values()
     drug_target_matrix = np.load(
         './datainfo/filtered_data/drug_target_matrix.npy')
     for row in final_dl_input_df.itertuples():
         drug_a = drug_map_dict[row[2]]
         cellline_name = row[1]
         # DRUG_A AND 929 TARGET GENES
         drug_a_target_list = []
         drug_index = drug_dict[drug_a]
         for target_index in target_index_list:
             if target_index == -1:
                 effect = 0
             else:
                 effect = drug_target_matrix[drug_index, target_index]
             drug_a_target_list.append(effect)
         if all([a == 0 for a in drug_a_target_list]):
             deletion_list.append(row[0])
     print('=====================' + str(len(deletion_list)))
     zero_final_dl_input_df = final_dl_input_df.drop(
         final_dl_input_df.index[deletion_list]).reset_index(drop=True)
     zero_final_dl_input_df.to_csv(
         '.' + dir_opt + '/filtered_data/zerofinal_GDSC2_dl_input.txt',
         index=False,
         header=True)
     print(zero_final_dl_input_df)
Exemple #12
0
def create_datasets(load_d: LoadData):
    """
    loads already created important files
    :param load_d: query processor in case of a missing file
    :return: loaded objects
    """
    try:
        wf_matching = pd.read_csv('matching.csv')
    except FileNotFoundError:
        create_matching('admissions.csv', "RECORDS-waveforms.csv")
        wf_matching = pd.read_csv('matching.csv')
    try:
        numeric_matching = pd.read_csv('matching_numerics.csv')
    except FileNotFoundError:
        create_matching('admissions.csv', "RECORDS-numerics.csv", numerics=True)
        numeric_matching = pd.read_csv('matching.csv')
    try:
        mortality = pd.read_csv("in_hospital_mortality.csv")
    except FileNotFoundError:
        mortality = load_d.query_db(queries.in_hospital_mortality())
        mortality.to_csv("in_hospital_mortality.csv")
    try:
        clinical_features = pd.read_csv("features_clinical.csv")
    except FileNotFoundError as e:
        print(str(e))
        print("please create clinical features file for all patients")
    return wf_matching, numeric_matching, mortality, clinical_features
Exemple #13
0
def main(mode):
    print('Loading data...')
    data = LoadData(dataset_path='dataset', images_path='dataset/images/')

    print('Creating Batch Generator...')
    batch_creator = BatchGenerator(data_dict=data.data_dict,
                                   captions_int=data.captions_int,
                                   image_addr=data.image_addr,
                                   **batch_params)

    if mode == 'train':
        print('Creating Models...')
        caption_model = CaptionLSTM(model_params=model_params,
                                    int2word=data.int2word)

        print('Starting training...')
        class_weights = calc_class_weights(data.captions_int.values)
        train(caption_model, batch_creator, class_weights, **train_params)

    elif mode == 'sample':
        print('Loading model...')
        model_file = open('vgg_lstm.pkl', 'rb')
        model = pickle.load(model_file)
        print('Creating sample..')
        sample(model, batch_creator, top_k=10, seq_len=16, show_image=True)

    elif mode == 'test':
        print('Loading model')
        model_file = open('vgg_lstm.pkl', 'rb')
        model = pickle.load(model_file)
        print('Testing model...')
        test(model, batch_creator, top_k=10, seq_len=16)
def continue_run_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1,
                         layer2, layer3, path, dir_opt, RNA_seq_filename,
                         input_num, epoch, batch_size, verbose):
    # RECONSTRCUT TO BE TRAINED MODEL
    model = RandNN().keras_rand_nn(matrixA, matrixB, num_gene, layer0, layer1,
                                   layer2, layer3)
    with open(path + '/layer_bias_list.txt', 'rb') as filebias:
        layer_bias_list = pickle.load(filebias)
    with open(path + '/layer_weight_list.txt', 'rb') as fileweight:
        layer_weight_list = pickle.load(fileweight)
    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['mse', 'accuracy'])
    xTmp, yTmp = LoadData(dir_opt, RNA_seq_filename).load_train(0, 1)
    model.fit(xTmp, yTmp, epochs=1, validation_split=1, verbose=0)
    model_layer_list = []
    num_layer = len(model.layers)
    for i in range(num_layer):
        each_layer_list = [layer_weight_list[i], layer_bias_list[i]]
        model_layer_list.append(each_layer_list)
        model.layers[i].set_weights(each_layer_list)
    # AUTO UPDATE WEIGHT
    model, history, num_layer, path = RunRandNN(
        model, dir_opt, RNA_seq_filename).train(input_num, epoch, batch_size,
                                                verbose)
    return model, history, path
Exemple #15
0
    def drug_gene_pathway_reform(self):
        dir_opt = self.dir_opt
        rna_df = pd.read_csv('./datainfo/filtered_data/tailed_rnaseq_fpkm_20191101.csv')
        gene_list = list(rna_df['symbol'])
        drug_map_dict, drug_dict, gene_target_num_dict = LoadData(dir_opt).pre_load_dict()
        # 929 MAPPED GENES INDEX IN [drug_target_matrix]
        target_index_list = gene_target_num_dict.values()
        # 24 DRUGS IN DEEP LEARNING TASK
        zero_final_dl_input_df = pd.read_table('./datainfo/filtered_data/zerofinal_GDSC2_dl_input.txt', delimiter = ',')
        zero_final_drug_list = []
        for drug in zero_final_dl_input_df['DRUG_NAME']:
            if drug not in zero_final_drug_list:
                zero_final_drug_list.append(drug)
        zero_final_drug_list = sorted(zero_final_drug_list)
        # 24 MAPPED DRUGS IN DEEP LEARNING TASK
        mapped_drug_list = []
        for zero_drug in zero_final_drug_list:
            mapped_drug_list.append(drug_map_dict[zero_drug])
        drug_target_matrix = np.load('.' + dir_opt + '/filtered_data/drug_target_matrix.npy')
        # FIND DRUGS CAN TARGET ON GENES
        multi_drug_list = []
        for target_index in target_index_list:
            temp_drug_list = []
            if target_index == -1:
                temp_drug_list = ['NaN']
                multi_drug_list.append(temp_drug_list)
            else:
                for mapped_drug in mapped_drug_list:
                    drug_index = drug_dict[mapped_drug]
                    effect = drug_target_matrix[drug_index, target_index]
                    if effect == 1: temp_drug_list.append(mapped_drug)
                if len(temp_drug_list) == 0: temp_drug_list = ['NaN']
                multi_drug_list.append(temp_drug_list)
        # CONVERT EACH GENES TARGETED DRUGS TO DATAFRAME
        drug_gene = {'Drugs': multi_drug_list, 'Genes': gene_list}
        drug_gene_df = pd.DataFrame(drug_gene, columns=['Drugs','Genes'])
        drug_gene_df.to_csv('./datainfo/filtered_data/drug_gene.csv', index = False, header = True)

        # ADD PATHWAYS TO CORRESPONDING GENES
        gene_pathway_df = pd.read_csv('./datainfo/filtered_data/Tailed_Selected_Kegg_Pathways2.csv')
        pathway_name_list = list(gene_pathway_df.columns)[1:]
        multi_pathway_list = []
        # import pdb; pdb.set_trace()
        for row in gene_pathway_df.itertuples():
            temp_pathway_list = []
            for index in np.arange(2, 48):
                if row[index] == 1: 
                    temp_pathway_list.append(pathway_name_list[index - 2])
            if len(temp_pathway_list) == 0:
                temp_pathway_list = ['NaN']
                print(row[1])
            multi_pathway_list.append(temp_pathway_list)
        # print(multi_pathway_list)
        # print(len(multi_pathway_list))
        # CONVERT EACH GENES TARGETED DRUGS/ CONNECTION TO PATHWAYS TO DATAFRAME
        drug_gene_pathway = {'Drugs': multi_drug_list, 'Genes': gene_list, 'Pathways': multi_pathway_list}
        drug_gene_pathway_df = pd.DataFrame(drug_gene_pathway, columns=['Drugs', 'Genes', 'Pathways'])
        drug_gene_pathway_df.to_csv('./datainfo/filtered_data/drug_gene_pathway.csv', index = False, header = True)
        print(drug_gene_pathway_df)
Exemple #16
0
def verify_input():
    # get the data from input folder
    val1 = v1.get()
    chem_type = get_chemType(val1)
    chem_file = get_chemFile(chem_type)
    region_file = CUR_PATH + './Input/Region.xlsx'
    release_file = CUR_PATH + './Input/ChemRelease.xlsx'
    start_date_temp = '2005 1 1'
    end_date_temp = '2005 12 31'

    if chem_type != 'Nanomaterial':
        load_data_nonNano = LoadData(chem_type, chem_file, region_file,
                                     release_file, start_date_temp,
                                     end_date_temp)
        chem_params, presence, env, climate, bgConc, release, release_scenario = load_data_nonNano.run_loadData(
        )
    else:
        time, presence, env, climate, bgConc, chem_params, release, release_scenario = load_data(
            region_file, release_file, chem_file, start_date_temp,
            end_date_temp)

    chem_txt4 = Label(text='Chemical:', bg=bg_col).place(x=140, y=170)
    chem_name = StringVar(window, value=chem_params['name'])
    entry1 = Entry(window,
                   textvariable=chem_name,
                   bg='lightgrey',
                   state='disabled')
    entry1.place(x=220, y=170, width=250)

    chem_txt5 = Label(text='Region:', bg=bg_col).place(x=140, y=200)
    region_name = StringVar(window, value=env['name'])
    entry2 = Entry(window,
                   textvariable=region_name,
                   bg='lightgrey',
                   state='disabled')
    entry2.place(x=220, y=200, width=250)

    chem_txt6 = Label(text='Release:', bg=bg_col).place(x=140, y=230)
    release_scenario = StringVar(window, value=release_scenario)
    entry3 = Entry(window,
                   textvariable=release_scenario,
                   bg='lightgrey',
                   state='disabled')
    entry3.place(x=220, y=230, width=250)

    return chem_params, env, release_scenario
Exemple #17
0
def testprediction(config_file, pred_file=None, label_file=None, out_path=None):     
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    
    test_data = LoadData(directory = config.get('Testing Data', 'folders').split(','), 
                         data_file_name = config.get('Testing Data', 'data_file'),
                         label_file_name = config.get('Testing Data', 'label_file'), 
                         seg_file_name = config.get('Testing Data', 'seg_file'))
    
    res = Analyzer(raw = test_data.get_data()[0],
                   target = test_data.get_labels()[0])
    res.add_results(results_folder = config.get('General','directory'),
                    name = config_file.split('/')[-3],
                    prediction_file = config.get('Testing', 'prediction_file')+'_0', 
                    learning_curve_file = 'learning_curve')           
    res.analyze(-1, pred_file=pred_file, label_file=label_file, out_path=out_path)
    
    return res
def evaluate_accuracy():

    torch.backends.cudnn.deterministic = True
    device = torch.device("cuda")

    print("CUDA visible devices: " + str(torch.cuda.device_count()))
    print("CUDA Device Name: " + str(torch.cuda.get_device_name(device)))

    # Create test dataset loader
    test_dataset = LoadData(dataset_dir, TEST_SIZE, 2.0, test=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=1,
                             shuffle=False,
                             num_workers=1,
                             pin_memory=True,
                             drop_last=False)

    # Define the model architecture and restore it from the .pth file, e.g.:

    model = PyNET(level=0, instance_norm=True,
                  instance_norm_level_1=True).to(device)
    model = torch.nn.DataParallel(model)
    model.load_state_dict(torch.load("models/original/pynet_level_0.pth"),
                          strict=True)

    # Define the losses

    MSE_loss = torch.nn.MSELoss()
    MS_SSIM = MSSSIM()

    loss_psnr = 0.0
    loss_msssim = 0.0

    model.eval()
    with torch.no_grad():

        test_iter = iter(test_loader)
        for j in range(len(test_loader)):

            x, y = next(test_iter)
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            # Process raw images with your model:
            enhanced = model(x)

            # Compute losses
            loss_mse_temp = MSE_loss(enhanced, y).item()
            loss_psnr += 20 * math.log10(1.0 / math.sqrt(loss_mse_temp))

            loss_msssim += MS_SSIM(y, enhanced).detach().cpu().numpy()

    loss_psnr = loss_psnr / TEST_SIZE
    loss_msssim = loss_msssim / TEST_SIZE

    output_logs = "PSNR: %.4g, MS-SSIM: %.4g\n" % (loss_psnr, loss_msssim)
    print(output_logs)
Exemple #19
0
def start_cnn():
    load_data = LoadData()
    process_data = ProcessDataCNN(sales=load_data.make_dataset(
        categorize=CATEGORIZE, start=START, upper=UPPER))
    xt, yt, xv, yv, xe, ye = process_data.run()

    if not ONLY_LOAD_MODEL:
        ckpt = ModelCheckpoint(OUTPUT_MODEL,
                               monitor='val_loss',
                               verbose=1,
                               save_best_only=True,
                               mode='min')
        reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                      factor=0.2,
                                      patience=5,
                                      min_lr=MIN_LR,
                                      verbose=1)
        es = EarlyStopping(monitor='val_loss', patience=3)
        net = make_model_cnn(len(process_data.FEATS))
        plot_model(net, to_file='model.png')
        print(net.summary())
        # exit()

        net.fit(xt,
                yt,
                batch_size=BATCH_SIZE,
                epochs=EPOCH,
                validation_data=(xv, yv),
                callbacks=[reduce_lr, ckpt, es])

    nett = make_model_cnn(len(process_data.feats_list))
    nett.load_weights(OUTPUT_MODEL)

    pv = nett.predict(xv, batch_size=BATCH_SIZE, verbose=1)
    pe = nett.predict(xe, batch_size=BATCH_SIZE, verbose=1)
    print("Eva result:", nett.evaluate(xv, yv, batch_size=BATCH_SIZE))

    pv = pv.reshape((-1, 28, 9))
    pe = pe.reshape((-1, 28, 9))
    sv = process_data.sv.reshape((-1, 28))
    se = process_data.se.reshape((-1, 28))
    Yv = yv.reshape((-1, 28))

    return process_data, Yv, pv, pe, sv, se
    def test_load_data_expect_summean_when_links_are_valid(self):

       endpoints_reader = LoadData()
       data = Processor()
       json_link = '[{"date":"22-01-2021","impressions": 1376}, \
       {"date":"21-01-2021","impressions": 1906},{"date":"20-01-2021","impressions": 2818},\
       {"date":"19-01-2021","impressions": 1024}, \
       {"date":"18-01-2021","impressions": 646},{"date":"17-01-2021","impressions": 2885}, \
       {"date":"16-01-2021","impressions": 1889},{"date":"15-01-2021","impressions": 1534}, \
       {"date":"14-01-2021","impressions": 995},{"date":"13-01-2021","impressions": 1251}, \
       {"date":"12-01-2021","impressions": 2062},{"date":"11-01-2021","impressions": 1204}, \
       {"date":"10-01-2021","impressions": 2030},{"date":"09-01-2021","impressions": 1166}, \
       {"date":"08-01-2021","impressions": 2025},{"date":"07-01-2021","impressions": 1221}, \
       {"date":"06-01-2021","impressions": 2018},{"date":"05-01-2021","impressions": 2484}, \
       {"date":"04-01-2021","impressions": 1145},{"date":"03-01-2021","impressions": 2686}, \
       {"date":"02-01-2021","impressions": 2186},{"date":"01-01-2021","impressions": 1527}, \
       {"date":"31-12-2020","impressions": 1710},{"date":"30-12-2020","impressions": 1343}, \
       {"date":"29-12-2020","impressions": 2466},{"date":"28-12-2020","impressions": 952}, \
       {"date":"27-12-2020","impressions": 532},{"date":"26-12-2020","impressions": 2690}, \
       {"date":"25-12-2020","impressions": 2428},{"date":"24-12-2020","impressions": 602}, \
       {"date":"23-12-2020","impressions": 995},{"date":"22-12-2020","impressions": 615}, \
       {"date":"21-12-2020","impressions": 2055},{"date":"20-12-2020","impressions": 1337}, \
       {"date":"19-12-2020","impressions": 1824},{"date":"18-12-2020","impressions": 1645}, \
       {"date":"17-12-2020","impressions": 2655},{"date":"16-12-2020","impressions": 2619}, \
       {"date":"15-12-2020","impressions": 1189},{"date":"14-12-2020","impressions": 2391}, \
       {"date":"13-12-2020","impressions": 1612},{"date":"12-12-2020","impressions": 510}, \
       {"date":"11-12-2020","impressions": 2655},{"date":"10-12-2020","impressions": 2029}, \
       {"date":"09-12-2020","impressions": 2899},{"date":"08-12-2020","impressions": 1170}, \
       {"date":"07-12-2020","impressions": 526},{"date":"06-12-2020","impressions": 2092}, \
       {"date":"05-12-2020","impressions": 1453},{"date":"04-12-2020","impressions": 738}, \
       {"date":"03-12-2020","impressions": 700},{"date":"02-12-2020","impressions": 1485}, \
       {"date":"01-12-2020","impressions": 1571},{"date":"30-11-2020","impressions": 2106}, \
       {"date":"29-11-2020","impressions": 2646},{"date":"28-11-2020","impressions": 1092}, \
       {"date":"27-11-2020","impressions": 1495},{"date":"26-11-2020","impressions": 2356}, \
       {"date":"25-11-2020","impressions": 1474},{"date":"24-11-2020","impressions": 1431}, \
       {"date":"23-11-2020","impressions": 1359},{"date":"22-11-2020","impressions": 1420}]'
       csv_link = 'ba026992-281a-42a6-8447-ae1c8a04106e.csv'
       csv_file, json_file = endpoints_reader.read_data(csv_link, json_link)
       summary_csv_info, summary_json_info = data.process_endpoints(csv_file, json_file)
       print(summary_csv_info, summary_json_info)
       self.assertEqual(str(summary_csv_info), '{"mean": 1781.85, "sum": 110475}')
       self.assertEqual(str(summary_json_info), '{"mean": 1660.4, "sum": 102945}')
Exemple #21
0
 def zero_final_drug_count():
     zero_final_dl_input_df = pd.read_table('./datainfo/filtered_data/zerofinal_GDSC2_dl_input.txt', delimiter = ',')
     zero_final_drug_list = []
     for drug in zero_final_dl_input_df['DRUG_NAME']:
         if drug not in zero_final_drug_list:
             zero_final_drug_list.append(drug)
     zero_final_drug_list = sorted(zero_final_drug_list)
     print(zero_final_drug_list)
     print(len(zero_final_drug_list))
     # Count the Number of Drugs Intersection Between [dl_input, drugBank]
     dir_opt = '/datainfo'
     drug_map_dict, drug_dict, gene_target_num_dict = LoadData(dir_opt).pre_load_dict()
     count = 0
     anti_count = 0
     for key, value in drug_map_dict.items():
         if type(value) is str:
             count = count + 1
         elif math.isnan(value) == True:
             anti_count = anti_count + 1
     print(count)
     print(anti_count)
Exemple #22
0
def cardiac_patients(query_proccesor: LoadData, admissions: str, gender: str, path: str, cardiac=True):
    """
    Extract patients using a filtering scheme
    :param query_proccesor: sql handler
    :param admissions: relevant admissions
    :param gender: desired gender for the patients
    :param path: path to already created file
    :param cardiac: cardiac or non-cardiac patients
    :return: patient dataframe with demographics
    """
    try:
        patients = pd.read_csv(path)
    except FileNotFoundError:
        patients = query_proccesor.query_db(queries.patient_selection(admissions, gender, cardiac))
        patients.to_csv(path)
    return patients
def manual_test_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1, layer2, layer3, path, dir_opt, learning_rate):
    # MANUAL REBUILD THE MODEL
    input_model, gene_model, pathway_model, model = build_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1, layer2, layer3)
    with open(path + '/layer_list.txt', 'rb') as filelayer:
        layer_list = pickle.load(filelayer)
    model.compile(loss='mean_squared_error',
                    optimizer=Adam(lr=learning_rate),
                    metrics=['mse', 'accuracy']) 
    xTmp, yTmp = LoadData(dir_opt).load_train(0, 1)
    model.fit(xTmp, yTmp, epochs = 1, validation_split = 1, verbose = 0)
    num_layer = len(model.layers)
    for i in range(num_layer):
        model.get_layer(index = i).set_weights(layer_list[i])
    # PREDICT MODEL USING [xTe, yTe]
    verbose = 1
    y_pred, score = RunRandNN(model, dir_opt).test(verbose, path)
Exemple #24
0
def rna_proteomic_mismatch_probabilities(train=True):
    # Train siamese network
    data = LoadData()
    pro_data = data.proteomic
    rna_data = data.rna

    prot_x = pd.concat([pro_data] * 3 + [data.test_proteomic] * 3)
    shuffled_rna = rna_data.sample(frac=1)
    rna_x = pd.concat([
        rna_data, shuffled_rna,
        rna_data.sample(frac=1), data.test_rna,
        data.test_rna.sample(frac=1),
        data.test_rna.sample(frac=1)
    ])
    labels = [1.0] * 80 + [0.0] * 160 + [1.0] * 80 + [0.0] * 160

    network = SiameseNet([(pro_data.shape[-1], ), (rna_data.shape[-1], )])
    network.fit([prot_x, rna_x],
                labels,
                epochs=100,
                batch_size=5,
                verbose=False)

    # Calculate pairwise probabilities
    if not train:
        pro_data = data.test_proteomic
        rna_data = data.test_rna

    vals = {
        "Proteomic": [],
        "RNA": [],
        "Probability": [],
    }
    for i, x in pro_data.iterrows():
        for j, y in rna_data.iterrows():
            vals['Proteomic'].append(x.name)
            vals['RNA'].append(y.name)
            vals['Probability'].append(network.predict([[x], [y]])[0][0])

    probs = pd.DataFrame(vals)
    if train:
        order = data.clinical.index.tolist()
    else:
        order = data.test_clinical.index.tolist()
    probs = probs.pivot(index='RNA', columns='Proteomic',
                        values='Probability')[order].reindex(order)
    return probs
def continue_run_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1, layer2, layer3, path,
                dir_opt, input_num, epoch, batch_size, verbose, learning_rate, end_epoch):
    # REBUILD DECOMPOSED MODEL FROM SAVED MODEL
    input_model, gene_model, pathway_model, model = build_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1, layer2, layer3)
    with open(path + '/layer_list.txt', 'rb') as filelayer:
        layer_list = pickle.load(filelayer)
    model.compile(loss='mean_squared_error',
                    optimizer=Adam(lr=learning_rate),
                    metrics=['mse', 'accuracy']) 
    xTmp, yTmp = LoadData(dir_opt).load_train(0, 1)
    model.fit(xTmp, yTmp, epochs = 1, validation_split = 1, verbose = 0)
    num_layer = len(model.layers)
    for i in range(num_layer):
        model.get_layer(index = i).set_weights(layer_list[i])
    # RUN MODEL (AUTO UPDATE WEIGHT)    
    model, history, path = run_rand_nn(model, dir_opt, matrixA, matrixB, input_num, epoch, batch_size, verbose, learning_rate, end_epoch)
    return model, history, path
Exemple #26
0
 def test(self, verbose, path):
     model = self.model
     dir_opt = self.dir_opt
     RNA_seq_filename = self.RNA_seq_filename
     xTe, yTe = LoadData(dir_opt, RNA_seq_filename).load_test()
     # TEST OUTPUT PRED 
     y_pred = model.predict(xTe)
     y_pred_list = [item for elem in y_pred for item in elem]
     score = model.evaluate(xTe, yTe, verbose = verbose)
     final_test_input_df = pd.read_csv('.' + dir_opt + '/filtered_data/TestInput.txt', delimiter = ',')
     final_row, final_col = final_test_input_df.shape
     final_test_input_df.insert(final_col, 'Pred Score', y_pred_list, True)
     final_test_input_df.to_csv(path + '/PredTestInput.txt', index = False, header = True)
     # ANALYSE PEARSON CORR
     test_pearson = final_test_input_df.corr(method = 'pearson')
     print(score)
     print(test_pearson)
     return y_pred, score
Exemple #27
0
def clinical_labels_dict(train=True):
    data = LoadData()

    def clinical_to_int(row):
        output = 0
        if row.msi == "MSI-High":
            output += 2
        if row.gender == "Female":
            output += 1
        return output

    if train:
        clin_data = data.clinical
    else:
        clin_data = data.test_clinical

    labels = clin_data.apply(clinical_to_int, axis="columns")
    return {sample: label for sample, label in zip(clin_data.index, labels)}
Exemple #28
0
def clinical_probabilities(train=True, learner=learner_functions.train_rf):
    """Get the probabilities of each clinical class for each sample for
    proteomic and rna data.

    Returns (proteomic_probabilities_df, rna_probabilities_df)
    """
    data = LoadData()

    def clinical_to_int(row):
        output = 0
        if row.msi == "MSI-High":
            output += 2
        if row.gender == "Female":
            output += 1
        return output

    train_labels = pd.concat([data.clinical,
                              data.test_clinical]).apply(clinical_to_int,
                                                         axis="columns")
    train_proteomic = pd.concat([data.proteomic, data.test_proteomic])
    train_rna = pd.concat([data.rna, data.test_rna])

    model = learner(train_proteomic, train_labels)
    if train:
        proteomic_probabilities_df = pd.DataFrame(model[0].predict_proba(
            data.proteomic))
        proteomic_probabilities_df['sample'] = data.clinical.index
    else:
        proteomic_probabilities_df = pd.DataFrame(model[0].predict_proba(
            data.test_proteomic))
        proteomic_probabilities_df['sample'] = data.test_clinical.index
    proteomic_probabilities_df = proteomic_probabilities_df.set_index('sample')

    model = learner(train_rna, train_labels)
    if train:
        rna_probabilities_df = pd.DataFrame(model[0].predict_proba(data.rna))
        rna_probabilities_df['sample'] = data.clinical.index
    else:
        rna_probabilities_df = pd.DataFrame(model[0].predict_proba(
            data.test_rna))
        rna_probabilities_df['sample'] = data.test_clinical.index
    rna_probabilities_df = rna_probabilities_df.set_index('sample')

    return proteomic_probabilities_df, rna_probabilities_df
 def input_drug_gene_condense(self, RNA_seq_filename):
     dir_opt = self.dir_opt
     deletion_list = []
     final_dl_input_df = pd.read_table(
         '.' + dir_opt + '/mid_data/FinalDeepLearningInput.txt',
         delimiter=',')
     drug_map_dict, cellline_map_dict, drug_dict, gene_target_num_dict = LoadData(
         dir_opt, RNA_seq_filename).pre_load_dict()
     target_index_list = gene_target_num_dict.values()
     drug_target_matrix = np.load('.' + dir_opt +
                                  '/filtered_data/drug_target_matrix.npy')
     for row in final_dl_input_df.itertuples():
         drug_a = drug_map_dict[row[1]]
         drug_b = drug_map_dict[row[2]]
         cellline_name = cellline_map_dict[row[3]]
         # DRUG_A AND 1130 TARGET GENES
         drug_a_target_list = []
         drug_index = drug_dict[drug_a]
         for target_index in target_index_list:
             if target_index == -1:
                 effect = 0
             else:
                 effect = drug_target_matrix[drug_index, target_index]
             drug_a_target_list.append(effect)
         # DRUG_B AND 1130 TARGET GENES
         drug_b_target_list = []
         drug_index = drug_dict[drug_b]
         for target_index in target_index_list:
             if target_index == -1:
                 effect = 0
             else:
                 effect = drug_target_matrix[drug_index, target_index]
             drug_b_target_list.append(effect)
         if all([a == 0 for a in drug_a_target_list]) or all(
             [b == 0 for b in drug_b_target_list]):
             deletion_list.append(row[0])
     zero_final_dl_input_df = final_dl_input_df.drop(
         final_dl_input_df.index[deletion_list]).reset_index(drop=True)
     zero_final_dl_input_df.to_csv(
         '.' + dir_opt + '/filtered_data/ZeroFinalDeepLearningInput.txt',
         index=False,
         header=True)
     print(zero_final_dl_input_df)
Exemple #30
0
def main(mode):
    dataset_path = 'data'
    data = LoadData(dataset_path=dataset_path, **data_params)

    print('Creating Batch Generator...')
    batch_gen = BatchGenerator(data_dict=data.data_dict,
                               label_dict=data.label_dict,
                               **batch_params)

    if mode == 'train':
        train(vocabs=[data.word2int, data.int2word],
              batch_gen=batch_gen,
              train_params=train_params,
              model_params=model_params)

    elif mode == 'test':
        print('Loading model')
        model_file = open('results/seq2seq.pkl', 'rb')
        model = pkl.load(model_file)
        print('Testing model...')
        test(model, data.int2word, batch_gen)
Exemple #31
0
def ViewResults(**kwargs):
    directory = kwargs.get("directory", "")
    network = kwargs.get("network", None)
    prediction_file = kwargs.get("predictions_file", None)

    if network:
        # Assume that all networks are tested on the same set of data
        config = ConfigParser.ConfigParser()
        config.read("networks/" + network + "/network.cfg")
        data = LoadData(
            directory=config.get("Testing Data", "folders").split(",")[0],
            data_file_name=config.get("Testing Data", "data_file"),
            label_file_name=config.get("Testing Data", "label_file"),
        )

        if not prediction_file:
            prediction_file = "test_prediction_0"

        results = Analyzer(target=data.get_labels()[0], raw=data.get_data()[0])
        results.add_results(results_folder="networks/" + network + "/", name=network, prediction_file=prediction_file)

    else:
        folders = os.listdir(directory)
        networks = []
        for folder in folders:
            if os.path.isfile(directory + folder + "/network.cfg"):
                networks.append(folder)

        # Assume that all networks are tested on the same set of data
        config = ConfigParser.ConfigParser()
        config.read(directory + networks[0] + "/network.cfg")
        data = LoadData(
            directory=config.get("Testing Data", "folders").split(",")[0],
            data_file_name=config.get("Testing Data", "data_file"),
            label_file_name=config.get("Testing Data", "label_file"),
        )

        if not prediction_file:
            prediction_file = "test_prediction_0"

        results = Analyzer(target=data.get_labels()[0], raw=data.get_data()[0])
        for net in networks:
            results.add_results(results_folder=directory + net + "/", name=net, prediction_file=prediction_file)

    return results
Exemple #32
0
    def test_cleanup(self):

        with open("../data/pt_BR/nnp") as f:
            nnp = [line.rstrip() for line in f.readlines()]
        with open("../data/pt_BR/terms") as f:
            terms = [line.rstrip() for line in f.readlines()]
        with open("../data/pt_BR/patterns") as f:
            patterns = [line.rstrip() for line in f.readlines()]

        data = LoadData(['../corpus/sel1.csv', '../corpus/sel2.csv'],
                        ['D', 'C']).load()
        p = CleanupData(nnp, terms, patterns)
        new_data = p.clean(data)

        with open("results/clean.txt", "w") as f:
            f.write("\n".join("%s;%s;%s" % (d.identifier, d.text, d.status)
                              for d in new_data))

        for i, d in enumerate(new_data):
            dirname = "conectado" if d.status == "C" else "desconectado"
            filename = "results/%s/clt_%s.txt" % (dirname, i)
            with open(filename, "w") as f:
                f.write(d.text)
def manual_test_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1,
                        layer2, layer3, path, dir_opt, RNA_seq_filename):
    # RECONSTRCUT TEST MODEL
    model = RandNN().keras_rand_nn(matrixA, matrixB, num_gene, layer0, layer1,
                                   layer2, layer3)
    with open(path + '/layer_bias_list.txt', 'rb') as filebias:
        layer_bias_list = pickle.load(filebias)
    with open(path + '/layer_weight_list.txt', 'rb') as fileweight:
        layer_weight_list = pickle.load(fileweight)
    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['mse', 'accuracy'])
    xTmp, yTmp = LoadData(dir_opt, RNA_seq_filename).load_train(0, 1)
    model.fit(xTmp, yTmp, epochs=1, validation_split=1, verbose=0)
    model_layer_list = []
    num_layer = len(model.layers)
    for i in range(num_layer):
        each_layer_list = [layer_weight_list[i], layer_bias_list[i]]
        model_layer_list.append(each_layer_list)
        model.layers[i].set_weights(each_layer_list)
    # PREDICT MODEL USING [xTe, yTe]
    verbose = 1
    y_pred, score = RunRandNN(model, dir_opt,
                              RNA_seq_filename).test(verbose, path)
Exemple #34
0
def main(tablename,
         use_json=0,
         database='tnt',
         start_date=0,
         end_date=0,
         batch_size=30):
    try:
        last_version = get_last_version(tablename)
        file_path = "{}/data/{}_v{}.json".format(
            Settings.BASE_DIR, tablename,
            last_version) if use_json == 0 else use_json
        source_conn = Connection(
            'mo4jo').connect_mssql() if use_json is None else None

        target_conn = Connection(database).connect_mysql()

        start_date = start_date if start_date != None else '2012-01-01'
        batch_size = Settings.DATABASE_CONFIG['tnt']['batch_size']
        now = datetime.utcnow().date()

        mo4jo_pac_clients_mapping = json.loads(
            open(
                "{}/data/mo4jo_pac_clients_mapping.json".format(
                    Settings.BASE_DIR), "r").read())
        end_date = end_date if end_date != None else now
        # entity_json = PullData(source_conn, tablename, file_path).get_data(kwargs={'start_date':start_date,'end_date':end_date})
        entity_json = PullData(source_conn, tablename, file_path).get_data()
        if type(end_date) == 'datetime.date':
            end_date = datetime.strptime(end_date, '%Y-%m-%d').date()

        LoadData(target_conn, tablename, entity_json, start_date, end_date,
                 batch_size).load_data(mo4jo_pac_clients_mapping)
        # LoadData(target_conn, tablename, entity_json).load_data(mo4jo_pac_clients_mapping)
    except Exception as e:
        import traceback
        print(traceback.format_exc())
def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)


def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

# Load the Digit DataSet
load_data = LoadData()
train_set_x, train_set_y = load_data.load_train_data("/home/darshan/Documents/DigitRecognizer/MNIST_data/",
                                                    "train.csv")

#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

x = tf.placeholder(tf.float32, [None, 784])
x_image = tf.reshape(x, [-1, 28, 28, 1])
y_ = tf.placeholder(tf.float32, [None, 10])

# First Layer of Convnet
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

# 28 x 28 -> 24 x 24
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
Exemple #36
0
theano.sandbox.cuda.use(device)
if device != "cpu":
    theano.config.nvcc.flags = "-use=fast=math"
    theano.config.allow_gc = False


print "\nLoading Data..."
# ------------------------------------------------------------------------------
# Load data (even though most of it won't be used)
train_data_folder = "/nobackup/turaga/data/fibsem_medulla_7col/trvol-250-1-h5/"
data_file = "img_normalized.h5"
label_file = "groundtruth_aff.h5"
seg_file = "groundtruth_seg.h5"
data = LoadData(
    directory=train_data_folder, data_file_name=data_file, label_file_name=label_file, seg_file_name=seg_file
)


# ------------------------------------------------------------------------------
# Search
log_interval = 100
results = []
print "\nBeginning Search:"
fsize = 7
while fsize > 2:
    nlayers = 4
    while nlayers < max_layers:
        nfilters = 50
        while nfilters < max_filters:
            name = "conv-" + ` nlayers ` + "." + ` nfilters ` + "." + ` fsize `
Exemple #37
0
def makeprediction(config_file, data_file=None, out_path=None, out_file=None, gpu=None):
    
    #Open configuration file for this network
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    
    #Set the device on which to perform these computations
    if gpu:
        theano.sandbox.cuda.use(gpu)
        theano.config.nvcc.flags='-use=fast=math'
        theano.config.allow_gc=False
    else:
        device = config.get('General', 'device')
        theano.sandbox.cuda.use(device)
        if (device != 'cpu'):
            theano.config.nvcc.flags='-use=fast=math'
            theano.config.allow_gc=False
    #------------------------------------------------------------------------------

    starttime=time.clock()
    print '\nInitializing Network'
    if os.path.exists(config.get('General', 'directory')+config.get('Network', 'weights_folder')):
        network = CNN(weights_folder = config.get('General', 'directory')+config.get('Network', 'weights_folder'),
                      activation = config.get('Network', 'activation'))
    else:
        print 'Error: Weights folder does not exist. Could not initialize network'
        return;
    #------------------------------------------------------------------------------
    
    print 'Opening Data Files'
    if data_file:
        test_data = LoadData(directory = '', data_file_name = data_file)
    else:
        test_data = LoadData(directory = config.get('Testing Data', 'folders').split(','), 
                             data_file_name = config.get('Testing Data', 'data_file'))
    #------------------------------------------------------------------------------
    init_time = time.clock() - starttime
    print "Initialization = " + `init_time` + " seconds"       
                           
            
    starttime = time.clock()                 
    print 'Making Predictions'
    if out_path and out_file:
        network.predict(test_data.get_data(),
                        results_folder = out_path,
                        name = out_file)
    elif out_path:
        network.predict(test_data.get_data(),
                        results_folder = out_path,
                        name = config.get('Testing', 'prediction_file'))
    elif out_file:
        network.predict(test_data.get_data(),
                        results_folder = config.get('General', 'directory')+config.get('Testing', 'prediction_folder'),
                        name = out_file)
    else:
        network.predict(test_data.get_data(),
                        results_folder = config.get('General', 'directory')+config.get('Testing', 'prediction_folder'),
                        name = config.get('Testing', 'prediction_file'))
    pred_time = time.clock() - starttime
    #------------------------------------------------------------------------------
    print "Prediction Time   = " + `pred_time` + " seconds"
        
    test_data.close()
Exemple #38
0
def trainnetwork(config_file):
    
    #Open configuration file for this network
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    
    #Set the device on which to perform these computations
    device = config.get('General', 'device')
    theano.sandbox.cuda.use(device)
    if (device != 'cpu'):
        theano.config.nvcc.flags='-use=fast=math'
        theano.config.allow_gc=False
    #------------------------------------------------------------------------------
        
    print '\nOpening Data Files'
    #Load the data for training
    training_data = LoadData(directory = config.get('Training Data', 'folders').split(','),
                             data_file_name = config.get('Training Data', 'data_file'),
                             label_file_name = config.get('Training Data', 'label_file'),
                             seg_file_name = config.get('Training Data', 'seg_file'))
    #------------------------------------------------------------------------------
                             
    starttime=time.clock()
    #Create the network and trainer    
    if os.path.exists(config.get('General', 'directory')+config.get('Network', 'weights_folder')):
        print 'Loading Network'
        network = CNN(weights_folder = config.get('General', 'directory')+config.get('Network', 'weights_folder'),
                      activation = config.get('Network', 'activation'))
         
        print 'Loading Trainer'             
        network_trainer = Trainer(network, training_data.get_data(), training_data.get_labels(), training_data.get_segments(),
                                  chunk_size = config.getint('Training', 'chunk_size'),  
                                  batch_size = config.getint('Training', 'batch_size'),
                                  cost_func = config.get('Training', 'cost_func'),
                                  learning_method = config.get('Training', 'learning_method'),
                                  learning_rate = config.getfloat('Training', 'learning_rate'), 
                                  beta1 = config.getfloat('Training', 'beta1'),
                                  beta2 = config.getfloat('Training', 'beta2'), 
                                  damping = config.getfloat('Training', 'damping'), 
                                  trainer_folder = config.get('General', 'directory')+config.get('Training', 'trainer_folder'),
                                  log_interval = config.getint('Training', 'log_interval'),
                                  log_folder = config.get('General', 'directory')+config.get('Training', 'log_folder'))
    else:
        print 'Initializing Network'
        network = CNN(num_layers = config.getint('Network', 'num_layers'), 
                      num_filters = config.getint('Network', 'num_filters'), 
                      filter_size = config.getint('Network', 'filter_size'), 
                      activation = config.get('Network', 'activation'))
                      
        print 'Initializing Trainer'             
        network_trainer = Trainer(network, training_data.get_data(), training_data.get_labels(), training_data.get_segments(),
                                  chunk_size = config.getint('Training', 'chunk_size'),  
                                  batch_size = config.getint('Training', 'batch_size'),
                                  cost_func = config.get('Training', 'cost_func'),
                                  learning_method = config.get('Training', 'learning_method'),
                                  learning_rate = config.getfloat('Training', 'learning_rate'), 
                                  beta1 = config.getfloat('Training', 'beta1'),
                                  beta2 = config.getfloat('Training', 'beta2'), 
                                  damping = config.getfloat('Training', 'damping'), 
                                  log_interval = config.getint('Training', 'log_interval'),
                                  log_folder = config.get('General', 'directory')+config.get('Training', 'log_folder'))
                              
    init_time = time.clock() - starttime    
    #------------------------------------------------------------------------------
    print "Initialization = " + `init_time` + " seconds"
    
    starttime = time.clock()
    #Train the network
    print 'Training...\n'
    train_error = network_trainer.train(config.getint('Training', 'num_epochs'), 
                                        config.getboolean('Training', 'early_stop'), 
                                        config.getboolean('Training', 'print_updates'))
    total_time = time.clock() - starttime     
    #------------------------------------------------------------------------------   
    print "Total Time     =",total_time,"seconds"                   

    training_data.close()
    return train_error
 def _do(self, data):
     load_data = LoadData(data)
     load_data.main()