예제 #1
0
def main():
    # ------------------ config ------------------ #
    n_classes = 130
    batch_size = 10
    epochs = 25
    save_model_path = './'

    # -------------------------------------------- #

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'valid':
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    dataset = LoadData(data_dir='./dataset/JsonData/',
                       mode='train',
                       transforms=data_transforms['train'])

    train_loaders = torch.utils.data.DataLoader(dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

    dataset = LoadData(data_dir='./dataset/JsonData/',
                       mode='valid',
                       transforms=data_transforms['valid'])

    valid_loaders = torch.utils.data.DataLoader(dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

    dataloaders = {'train': train_loaders, 'valid': valid_loaders}

    print('GPUs Available:', torch.cuda.is_available())
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("training on ", device)

    net = Net(n_classes=n_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    loss = torch.nn.CrossEntropyLoss()

    train(net, criterion, optimizer, loss, device, dataloaders, epochs,
          save_model_path)
예제 #2
0
def main():
    # ------------------ config ------------------ #
    n_classes = 130
    batch_size = 2
    epochs = 30
    save_model_path = './'
    img_size = 448
    # -------------------------------------------- #

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomResizedCrop(img_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'valid':
        transforms.Compose([
            transforms.Resize(img_size + 50),
            transforms.CenterCrop(img_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    dataset = LoadData(data_dir='../dataset/JsonData/',
                       mode='train',
                       transforms=data_transforms['train'])

    train_loaders = torch.utils.data.DataLoader(dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

    dataset = LoadData(data_dir='../dataset/JsonData/',
                       mode='valid',
                       transforms=data_transforms['valid'])

    valid_loaders = torch.utils.data.DataLoader(dataset,
                                                batch_size=batch_size,
                                                shuffle=True)

    dataloaders = {'train': train_loaders, 'valid': valid_loaders}

    print('GPUs Available:', torch.cuda.is_available())
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("training on ", device)

    net = Net(n_classes=n_classes, pretrained=False).to(device)
    optimizer = optim.SGD(net.parameters(), lr=0.08, momentum=0.9)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)
    loss = torch.nn.CrossEntropyLoss(size_average=False)

    train_model(net, optimizer, loss, scheduler, device, dataloaders, epochs,
                save_model_path)
예제 #3
0
    def test_corpus(self):

        with open("../data/pt_BR/nnp") as f:
            nnp = [line.rstrip() for line in f.readlines()]
        with open("../data/pt_BR/terms") as f:
            terms = [line.rstrip() for line in f.readlines()]
        with open("../data/pt_BR/patterns") as f:
            patterns = [line.rstrip() for line in f.readlines()]

        data = LoadData(['../corpus/sel1.csv', '../corpus/sel2.csv']).load()
        p = PreProcessing(nnp, terms, patterns)

        tokens = []
        for d in data.values():
            tokens += p.clean_and_stem(d)

        bow, bow_features_names = p.build_bow(tokens)
        dist = np.sum(bow.toarray(), axis=0)
        tbow = {}
        for term, count in zip(bow_features_names, dist):
            tbow[term] = count

        import operator
        with open("bow", "w") as f:
            f.write(str(len(tbow)))
            f.write(
                str(
                    sorted(tbow.items(),
                           key=operator.itemgetter(1),
                           reverse=True)))

        terms = p.compute_tfidf(data.values(), eliminate_zeros=True)
        with open("terms", "w") as f:
            f.write(str(terms))
예제 #4
0
def index():
   endpoints_reader = LoadData()
   data = Processor()
   df_csv, df_json = endpoints_reader.read_data()
   summary_csv_info, summary_json_info = data.process_endpoints(df_csv, df_json)
   return 'Service A result: ' + summary_csv_info + \
           '\n' + 'Service B result: ' +  summary_json_info
예제 #5
0
def main(mode):
    print('Loading data...')
    data = LoadData(dataset_path='dataset', images_path='dataset/images/')

    print('Creating Batch Generator...')
    batch_creator = BatchGenerator(data_dict=data.data_dict,
                                   captions_int=data.captions_int,
                                   image_addr=data.image_addr,
                                   **batch_params)

    if mode == 'train':
        print('Creating Models...')
        caption_model = CaptionLSTM(model_params=model_params,
                                    int2word=data.int2word)

        print('Starting training...')
        class_weights = calc_class_weights(data.captions_int.values)
        train(caption_model, batch_creator, class_weights, **train_params)

    elif mode == 'sample':
        print('Loading model...')
        model_file = open('vgg_lstm.pkl', 'rb')
        model = pickle.load(model_file)
        print('Creating sample..')
        sample(model, batch_creator, top_k=10, seq_len=16, show_image=True)

    elif mode == 'test':
        print('Loading model')
        model_file = open('vgg_lstm.pkl', 'rb')
        model = pickle.load(model_file)
        print('Testing model...')
        test(model, batch_creator, top_k=10, seq_len=16)
def continue_run_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1,
                         layer2, layer3, path, dir_opt, RNA_seq_filename,
                         input_num, epoch, batch_size, verbose):
    # RECONSTRCUT TO BE TRAINED MODEL
    model = RandNN().keras_rand_nn(matrixA, matrixB, num_gene, layer0, layer1,
                                   layer2, layer3)
    with open(path + '/layer_bias_list.txt', 'rb') as filebias:
        layer_bias_list = pickle.load(filebias)
    with open(path + '/layer_weight_list.txt', 'rb') as fileweight:
        layer_weight_list = pickle.load(fileweight)
    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['mse', 'accuracy'])
    xTmp, yTmp = LoadData(dir_opt, RNA_seq_filename).load_train(0, 1)
    model.fit(xTmp, yTmp, epochs=1, validation_split=1, verbose=0)
    model_layer_list = []
    num_layer = len(model.layers)
    for i in range(num_layer):
        each_layer_list = [layer_weight_list[i], layer_bias_list[i]]
        model_layer_list.append(each_layer_list)
        model.layers[i].set_weights(each_layer_list)
    # AUTO UPDATE WEIGHT
    model, history, num_layer, path = RunRandNN(
        model, dir_opt, RNA_seq_filename).train(input_num, epoch, batch_size,
                                                verbose)
    return model, history, path
 def input_drug_gene_condense():
     dir_opt = '/datainfo'
     deletion_list = []
     final_dl_input_df = pd.read_table(
         './datainfo/mid_data/final_GDSC2_dl_input.txt', delimiter=',')
     drug_map_dict, drug_dict, gene_target_num_dict = LoadData(
         dir_opt).pre_load_dict()
     target_index_list = gene_target_num_dict.values()
     drug_target_matrix = np.load(
         './datainfo/filtered_data/drug_target_matrix.npy')
     for row in final_dl_input_df.itertuples():
         drug_a = drug_map_dict[row[2]]
         cellline_name = row[1]
         # DRUG_A AND 929 TARGET GENES
         drug_a_target_list = []
         drug_index = drug_dict[drug_a]
         for target_index in target_index_list:
             if target_index == -1:
                 effect = 0
             else:
                 effect = drug_target_matrix[drug_index, target_index]
             drug_a_target_list.append(effect)
         if all([a == 0 for a in drug_a_target_list]):
             deletion_list.append(row[0])
     print('=====================' + str(len(deletion_list)))
     zero_final_dl_input_df = final_dl_input_df.drop(
         final_dl_input_df.index[deletion_list]).reset_index(drop=True)
     zero_final_dl_input_df.to_csv(
         '.' + dir_opt + '/filtered_data/zerofinal_GDSC2_dl_input.txt',
         index=False,
         header=True)
     print(zero_final_dl_input_df)
예제 #8
0
    def drug_gene_pathway_reform(self):
        dir_opt = self.dir_opt
        rna_df = pd.read_csv('./datainfo/filtered_data/tailed_rnaseq_fpkm_20191101.csv')
        gene_list = list(rna_df['symbol'])
        drug_map_dict, drug_dict, gene_target_num_dict = LoadData(dir_opt).pre_load_dict()
        # 929 MAPPED GENES INDEX IN [drug_target_matrix]
        target_index_list = gene_target_num_dict.values()
        # 24 DRUGS IN DEEP LEARNING TASK
        zero_final_dl_input_df = pd.read_table('./datainfo/filtered_data/zerofinal_GDSC2_dl_input.txt', delimiter = ',')
        zero_final_drug_list = []
        for drug in zero_final_dl_input_df['DRUG_NAME']:
            if drug not in zero_final_drug_list:
                zero_final_drug_list.append(drug)
        zero_final_drug_list = sorted(zero_final_drug_list)
        # 24 MAPPED DRUGS IN DEEP LEARNING TASK
        mapped_drug_list = []
        for zero_drug in zero_final_drug_list:
            mapped_drug_list.append(drug_map_dict[zero_drug])
        drug_target_matrix = np.load('.' + dir_opt + '/filtered_data/drug_target_matrix.npy')
        # FIND DRUGS CAN TARGET ON GENES
        multi_drug_list = []
        for target_index in target_index_list:
            temp_drug_list = []
            if target_index == -1:
                temp_drug_list = ['NaN']
                multi_drug_list.append(temp_drug_list)
            else:
                for mapped_drug in mapped_drug_list:
                    drug_index = drug_dict[mapped_drug]
                    effect = drug_target_matrix[drug_index, target_index]
                    if effect == 1: temp_drug_list.append(mapped_drug)
                if len(temp_drug_list) == 0: temp_drug_list = ['NaN']
                multi_drug_list.append(temp_drug_list)
        # CONVERT EACH GENES TARGETED DRUGS TO DATAFRAME
        drug_gene = {'Drugs': multi_drug_list, 'Genes': gene_list}
        drug_gene_df = pd.DataFrame(drug_gene, columns=['Drugs','Genes'])
        drug_gene_df.to_csv('./datainfo/filtered_data/drug_gene.csv', index = False, header = True)

        # ADD PATHWAYS TO CORRESPONDING GENES
        gene_pathway_df = pd.read_csv('./datainfo/filtered_data/Tailed_Selected_Kegg_Pathways2.csv')
        pathway_name_list = list(gene_pathway_df.columns)[1:]
        multi_pathway_list = []
        # import pdb; pdb.set_trace()
        for row in gene_pathway_df.itertuples():
            temp_pathway_list = []
            for index in np.arange(2, 48):
                if row[index] == 1: 
                    temp_pathway_list.append(pathway_name_list[index - 2])
            if len(temp_pathway_list) == 0:
                temp_pathway_list = ['NaN']
                print(row[1])
            multi_pathway_list.append(temp_pathway_list)
        # print(multi_pathway_list)
        # print(len(multi_pathway_list))
        # CONVERT EACH GENES TARGETED DRUGS/ CONNECTION TO PATHWAYS TO DATAFRAME
        drug_gene_pathway = {'Drugs': multi_drug_list, 'Genes': gene_list, 'Pathways': multi_pathway_list}
        drug_gene_pathway_df = pd.DataFrame(drug_gene_pathway, columns=['Drugs', 'Genes', 'Pathways'])
        drug_gene_pathway_df.to_csv('./datainfo/filtered_data/drug_gene_pathway.csv', index = False, header = True)
        print(drug_gene_pathway_df)
예제 #9
0
 def init_with_data(self,
                    is_load=True,
                    max_outgoing=10,
                    max_airports=500,
                    silent=False):
     ld = LoadData(self, is_load, max_outgoing, max_airports, silent)
     ld.load()
     silent or self.print_info()
예제 #10
0
def evaluate_accuracy():

    torch.backends.cudnn.deterministic = True
    device = torch.device("cuda")

    print("CUDA visible devices: " + str(torch.cuda.device_count()))
    print("CUDA Device Name: " + str(torch.cuda.get_device_name(device)))

    # Create test dataset loader
    test_dataset = LoadData(dataset_dir, TEST_SIZE, 2.0, test=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=1,
                             shuffle=False,
                             num_workers=1,
                             pin_memory=True,
                             drop_last=False)

    # Define the model architecture and restore it from the .pth file, e.g.:

    model = PyNET(level=0, instance_norm=True,
                  instance_norm_level_1=True).to(device)
    model = torch.nn.DataParallel(model)
    model.load_state_dict(torch.load("models/original/pynet_level_0.pth"),
                          strict=True)

    # Define the losses

    MSE_loss = torch.nn.MSELoss()
    MS_SSIM = MSSSIM()

    loss_psnr = 0.0
    loss_msssim = 0.0

    model.eval()
    with torch.no_grad():

        test_iter = iter(test_loader)
        for j in range(len(test_loader)):

            x, y = next(test_iter)
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            # Process raw images with your model:
            enhanced = model(x)

            # Compute losses
            loss_mse_temp = MSE_loss(enhanced, y).item()
            loss_psnr += 20 * math.log10(1.0 / math.sqrt(loss_mse_temp))

            loss_msssim += MS_SSIM(y, enhanced).detach().cpu().numpy()

    loss_psnr = loss_psnr / TEST_SIZE
    loss_msssim = loss_msssim / TEST_SIZE

    output_logs = "PSNR: %.4g, MS-SSIM: %.4g\n" % (loss_psnr, loss_msssim)
    print(output_logs)
예제 #11
0
 def test_load_data_full_csv(self):
    endpoints_reader = LoadData()
    json_link = '[{"date":"22-01-2021", "impressions":1376}, \
                  {"date":"21-01-2021","impressions":1906}, \
                  {"date":"20-01-2021","impressions":2818}, \
                  {"date":"19-01-2021","impressions":1024}]'
    csv_link = 'mock_unittest.csv'
    csv_file, _ = endpoints_reader.read_data(csv_link, json_link)
    self.assertEqual(csv_file.shape[0], 2)
예제 #12
0
def start_rnn():
    load_data = LoadData()
    process_data = ProcessDataRNN(sales=load_data.make_dataset(
        categorize=CATEGORIZE, start=START, upper=UPPER))
    # xt, yt, xv, yv, xe, ye = process_data.run()
    xv, yv, xe, ye = process_data.run()

    if not ONLY_LOAD_MODEL:
        ckpt = ModelCheckpoint(OUTPUT_MODEL,
                               monitor='val_loss',
                               verbose=1,
                               save_best_only=True,
                               mode='min')
        reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                      factor=0.2,
                                      patience=5,
                                      min_lr=MIN_LR,
                                      verbose=1)
        es = EarlyStopping(monitor='val_loss', patience=3)
        # net = make_model(len(process_data.FEATS))
        net = make_model_rnn(process_data.Z.shape[2])
        print(net.summary())

        n_slices = LEN // 28
        brks = np.array(
            [LEN - (n_slices - i) * 28 for i in range(n_slices + 1)])
        brks = brks[brks >= max(LAGS) + 28]
        print("#" * 30)
        print(LEN, process_data.C.shape, process_data.Z.shape)
        print(brks)
        print(process_data.C.min(), process_data.ys.min(),
              process_data.Z[:, 66:].min())
        print("#" * 30)
        net.fit_generator(DataGenerator(
            (process_data.C, process_data.Z, process_data.ys),
            brks[:-1],
            batch_size=BATCH_SIZE),
                          epochs=EPOCH,
                          validation_data=(xv, yv),
                          callbacks=[ckpt, reduce_lr, es])

        # net.fit(xt, yt, batch_size=BATCH_SIZE, epochs=EPOCH, validation_data=(xv, yv), callbacks=[ckpt, reduce_lr, es])

    # nett = make_model(len(process_data.FEATS))
    nett = make_model_rnn(process_data.Z.shape[2])
    nett.load_weights(OUTPUT_MODEL)

    pv = nett.predict(xv, batch_size=BATCH_SIZE, verbose=1)
    pe = nett.predict(xe, batch_size=BATCH_SIZE, verbose=1)
    print("Eva result:", nett.evaluate(xv, yv, batch_size=BATCH_SIZE))

    # pv = pv.reshape((-1, 28, 9))
    # pe = pe.reshape((-1, 28, 9))
    sv = process_data.sv.reshape((-1, 28))
    se = process_data.se.reshape((-1, 28))
    # Yv = yv.reshape((-1, 28))
    return process_data, yv, pv, pe, sv, se
예제 #13
0
 def test_processor(self):
   endpoints_reader = LoadData()
   data = Processor()
   json_link = '[{"date":"22-01-2021", "impressions":1376}, \
                  {"date":"21-01-2021","impressions":1906}, \
                  {"date":"20-01-2021","impressions":2818}, \
                  {"date":"19-01-2021","impressions":1024}]'
   csv_link = 'mock_unittest.csv'
   df_csv, df_json = endpoints_reader.read_data(csv_link, json_link)
   summary_csv_info, summary_json_info = data.process_endpoints(df_csv, df_json)
   self.assertEqual(str(summary_csv_info), '{"mean": 1732.0, "sum": 3464}')
   self.assertEqual(str(summary_json_info), '{"mean": 1781.0, "sum": 7124}')
def main():

    if len(sys.argv == 2):
        img_path = sys.argv[1]
    else:
        img_path = DEFAULT_LIVE_IMG_PATH

    prediction = Predict(img_path)

    p_clothing = []
    p_body_type = []
    probability = None
    # If there was an update to the live img data location
    # Get live image/vid data
    live_imgs = LoadData(DEFAULT_LIVE_IMG_PATH, (IMAGE_WIDTH, IMAGE_HEIGHT))
    live_frame_dataset = live_imgs.get_last_n_frames(n=1)
    # Get the initial photos, labels, and location of individuals
    initial_people = LoadData(DEFAULT_INDIVIDUAL_IMG_PATH)
    person_dataset = initial_people.get_all_frames()

    # Given an individual's clothing hist and body type:
    # Compare that to the unkown body type and clothing histogram
    clothing_prob = ClothingEM()
    unkown_clothing_prob = ClothingEM(live_frame_dataset)
    unkown_clothing_dist = unkown_clothing_prob.clothing_distribution()
    body_prob = BodyTypeClassifier()

    for person in person_dataset:
        clothing_prob.add_frames(person)
        individual_clothing_dist = clothing_prob.clothing_distribution()
        # Compare each individual distribution with the unknown distribution and add it to arrays
        clothing_prediction = prediction.compare_individual_w_clothing(
            unkown_clothing_dist, individual_clothing_dist)

        body_prediction = prediction.compare_individual_w_body(
            unkown_body_distr, individual_body_distr)
        p_clothing.append({person: clothing_prediction})
        p_body_type.append({person: body_prediction})
def manual_test_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1, layer2, layer3, path, dir_opt, learning_rate):
    # MANUAL REBUILD THE MODEL
    input_model, gene_model, pathway_model, model = build_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1, layer2, layer3)
    with open(path + '/layer_list.txt', 'rb') as filelayer:
        layer_list = pickle.load(filelayer)
    model.compile(loss='mean_squared_error',
                    optimizer=Adam(lr=learning_rate),
                    metrics=['mse', 'accuracy']) 
    xTmp, yTmp = LoadData(dir_opt).load_train(0, 1)
    model.fit(xTmp, yTmp, epochs = 1, validation_split = 1, verbose = 0)
    num_layer = len(model.layers)
    for i in range(num_layer):
        model.get_layer(index = i).set_weights(layer_list[i])
    # PREDICT MODEL USING [xTe, yTe]
    verbose = 1
    y_pred, score = RunRandNN(model, dir_opt).test(verbose, path)
예제 #16
0
def rna_proteomic_mismatch_probabilities(train=True):
    # Train siamese network
    data = LoadData()
    pro_data = data.proteomic
    rna_data = data.rna

    prot_x = pd.concat([pro_data] * 3 + [data.test_proteomic] * 3)
    shuffled_rna = rna_data.sample(frac=1)
    rna_x = pd.concat([
        rna_data, shuffled_rna,
        rna_data.sample(frac=1), data.test_rna,
        data.test_rna.sample(frac=1),
        data.test_rna.sample(frac=1)
    ])
    labels = [1.0] * 80 + [0.0] * 160 + [1.0] * 80 + [0.0] * 160

    network = SiameseNet([(pro_data.shape[-1], ), (rna_data.shape[-1], )])
    network.fit([prot_x, rna_x],
                labels,
                epochs=100,
                batch_size=5,
                verbose=False)

    # Calculate pairwise probabilities
    if not train:
        pro_data = data.test_proteomic
        rna_data = data.test_rna

    vals = {
        "Proteomic": [],
        "RNA": [],
        "Probability": [],
    }
    for i, x in pro_data.iterrows():
        for j, y in rna_data.iterrows():
            vals['Proteomic'].append(x.name)
            vals['RNA'].append(y.name)
            vals['Probability'].append(network.predict([[x], [y]])[0][0])

    probs = pd.DataFrame(vals)
    if train:
        order = data.clinical.index.tolist()
    else:
        order = data.test_clinical.index.tolist()
    probs = probs.pivot(index='RNA', columns='Proteomic',
                        values='Probability')[order].reindex(order)
    return probs
예제 #17
0
def verify_input():
    # get the data from input folder
    val1 = v1.get()
    chem_type = get_chemType(val1)
    chem_file = get_chemFile(chem_type)
    region_file = CUR_PATH + './Input/Region.xlsx'
    release_file = CUR_PATH + './Input/ChemRelease.xlsx'
    start_date_temp = '2005 1 1'
    end_date_temp = '2005 12 31'

    if chem_type != 'Nanomaterial':
        load_data_nonNano = LoadData(chem_type, chem_file, region_file,
                                     release_file, start_date_temp,
                                     end_date_temp)
        chem_params, presence, env, climate, bgConc, release, release_scenario = load_data_nonNano.run_loadData(
        )
    else:
        time, presence, env, climate, bgConc, chem_params, release, release_scenario = load_data(
            region_file, release_file, chem_file, start_date_temp,
            end_date_temp)

    chem_txt4 = Label(text='Chemical:', bg=bg_col).place(x=140, y=170)
    chem_name = StringVar(window, value=chem_params['name'])
    entry1 = Entry(window,
                   textvariable=chem_name,
                   bg='lightgrey',
                   state='disabled')
    entry1.place(x=220, y=170, width=250)

    chem_txt5 = Label(text='Region:', bg=bg_col).place(x=140, y=200)
    region_name = StringVar(window, value=env['name'])
    entry2 = Entry(window,
                   textvariable=region_name,
                   bg='lightgrey',
                   state='disabled')
    entry2.place(x=220, y=200, width=250)

    chem_txt6 = Label(text='Release:', bg=bg_col).place(x=140, y=230)
    release_scenario = StringVar(window, value=release_scenario)
    entry3 = Entry(window,
                   textvariable=release_scenario,
                   bg='lightgrey',
                   state='disabled')
    entry3.place(x=220, y=230, width=250)

    return chem_params, env, release_scenario
def continue_run_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1, layer2, layer3, path,
                dir_opt, input_num, epoch, batch_size, verbose, learning_rate, end_epoch):
    # REBUILD DECOMPOSED MODEL FROM SAVED MODEL
    input_model, gene_model, pathway_model, model = build_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1, layer2, layer3)
    with open(path + '/layer_list.txt', 'rb') as filelayer:
        layer_list = pickle.load(filelayer)
    model.compile(loss='mean_squared_error',
                    optimizer=Adam(lr=learning_rate),
                    metrics=['mse', 'accuracy']) 
    xTmp, yTmp = LoadData(dir_opt).load_train(0, 1)
    model.fit(xTmp, yTmp, epochs = 1, validation_split = 1, verbose = 0)
    num_layer = len(model.layers)
    for i in range(num_layer):
        model.get_layer(index = i).set_weights(layer_list[i])
    # RUN MODEL (AUTO UPDATE WEIGHT)    
    model, history, path = run_rand_nn(model, dir_opt, matrixA, matrixB, input_num, epoch, batch_size, verbose, learning_rate, end_epoch)
    return model, history, path
예제 #19
0
 def test(self, verbose, path):
     model = self.model
     dir_opt = self.dir_opt
     RNA_seq_filename = self.RNA_seq_filename
     xTe, yTe = LoadData(dir_opt, RNA_seq_filename).load_test()
     # TEST OUTPUT PRED 
     y_pred = model.predict(xTe)
     y_pred_list = [item for elem in y_pred for item in elem]
     score = model.evaluate(xTe, yTe, verbose = verbose)
     final_test_input_df = pd.read_csv('.' + dir_opt + '/filtered_data/TestInput.txt', delimiter = ',')
     final_row, final_col = final_test_input_df.shape
     final_test_input_df.insert(final_col, 'Pred Score', y_pred_list, True)
     final_test_input_df.to_csv(path + '/PredTestInput.txt', index = False, header = True)
     # ANALYSE PEARSON CORR
     test_pearson = final_test_input_df.corr(method = 'pearson')
     print(score)
     print(test_pearson)
     return y_pred, score
예제 #20
0
def clinical_labels_dict(train=True):
    data = LoadData()

    def clinical_to_int(row):
        output = 0
        if row.msi == "MSI-High":
            output += 2
        if row.gender == "Female":
            output += 1
        return output

    if train:
        clin_data = data.clinical
    else:
        clin_data = data.test_clinical

    labels = clin_data.apply(clinical_to_int, axis="columns")
    return {sample: label for sample, label in zip(clin_data.index, labels)}
예제 #21
0
def start_cnn():
    load_data = LoadData()
    process_data = ProcessDataCNN(sales=load_data.make_dataset(
        categorize=CATEGORIZE, start=START, upper=UPPER))
    xt, yt, xv, yv, xe, ye = process_data.run()

    if not ONLY_LOAD_MODEL:
        ckpt = ModelCheckpoint(OUTPUT_MODEL,
                               monitor='val_loss',
                               verbose=1,
                               save_best_only=True,
                               mode='min')
        reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                      factor=0.2,
                                      patience=5,
                                      min_lr=MIN_LR,
                                      verbose=1)
        es = EarlyStopping(monitor='val_loss', patience=3)
        net = make_model_cnn(len(process_data.FEATS))
        plot_model(net, to_file='model.png')
        print(net.summary())
        # exit()

        net.fit(xt,
                yt,
                batch_size=BATCH_SIZE,
                epochs=EPOCH,
                validation_data=(xv, yv),
                callbacks=[reduce_lr, ckpt, es])

    nett = make_model_cnn(len(process_data.feats_list))
    nett.load_weights(OUTPUT_MODEL)

    pv = nett.predict(xv, batch_size=BATCH_SIZE, verbose=1)
    pe = nett.predict(xe, batch_size=BATCH_SIZE, verbose=1)
    print("Eva result:", nett.evaluate(xv, yv, batch_size=BATCH_SIZE))

    pv = pv.reshape((-1, 28, 9))
    pe = pe.reshape((-1, 28, 9))
    sv = process_data.sv.reshape((-1, 28))
    se = process_data.se.reshape((-1, 28))
    Yv = yv.reshape((-1, 28))

    return process_data, Yv, pv, pe, sv, se
예제 #22
0
def clinical_probabilities(train=True, learner=learner_functions.train_rf):
    """Get the probabilities of each clinical class for each sample for
    proteomic and rna data.

    Returns (proteomic_probabilities_df, rna_probabilities_df)
    """
    data = LoadData()

    def clinical_to_int(row):
        output = 0
        if row.msi == "MSI-High":
            output += 2
        if row.gender == "Female":
            output += 1
        return output

    train_labels = pd.concat([data.clinical,
                              data.test_clinical]).apply(clinical_to_int,
                                                         axis="columns")
    train_proteomic = pd.concat([data.proteomic, data.test_proteomic])
    train_rna = pd.concat([data.rna, data.test_rna])

    model = learner(train_proteomic, train_labels)
    if train:
        proteomic_probabilities_df = pd.DataFrame(model[0].predict_proba(
            data.proteomic))
        proteomic_probabilities_df['sample'] = data.clinical.index
    else:
        proteomic_probabilities_df = pd.DataFrame(model[0].predict_proba(
            data.test_proteomic))
        proteomic_probabilities_df['sample'] = data.test_clinical.index
    proteomic_probabilities_df = proteomic_probabilities_df.set_index('sample')

    model = learner(train_rna, train_labels)
    if train:
        rna_probabilities_df = pd.DataFrame(model[0].predict_proba(data.rna))
        rna_probabilities_df['sample'] = data.clinical.index
    else:
        rna_probabilities_df = pd.DataFrame(model[0].predict_proba(
            data.test_rna))
        rna_probabilities_df['sample'] = data.test_clinical.index
    rna_probabilities_df = rna_probabilities_df.set_index('sample')

    return proteomic_probabilities_df, rna_probabilities_df
 def input_drug_gene_condense(self, RNA_seq_filename):
     dir_opt = self.dir_opt
     deletion_list = []
     final_dl_input_df = pd.read_table(
         '.' + dir_opt + '/mid_data/FinalDeepLearningInput.txt',
         delimiter=',')
     drug_map_dict, cellline_map_dict, drug_dict, gene_target_num_dict = LoadData(
         dir_opt, RNA_seq_filename).pre_load_dict()
     target_index_list = gene_target_num_dict.values()
     drug_target_matrix = np.load('.' + dir_opt +
                                  '/filtered_data/drug_target_matrix.npy')
     for row in final_dl_input_df.itertuples():
         drug_a = drug_map_dict[row[1]]
         drug_b = drug_map_dict[row[2]]
         cellline_name = cellline_map_dict[row[3]]
         # DRUG_A AND 1130 TARGET GENES
         drug_a_target_list = []
         drug_index = drug_dict[drug_a]
         for target_index in target_index_list:
             if target_index == -1:
                 effect = 0
             else:
                 effect = drug_target_matrix[drug_index, target_index]
             drug_a_target_list.append(effect)
         # DRUG_B AND 1130 TARGET GENES
         drug_b_target_list = []
         drug_index = drug_dict[drug_b]
         for target_index in target_index_list:
             if target_index == -1:
                 effect = 0
             else:
                 effect = drug_target_matrix[drug_index, target_index]
             drug_b_target_list.append(effect)
         if all([a == 0 for a in drug_a_target_list]) or all(
             [b == 0 for b in drug_b_target_list]):
             deletion_list.append(row[0])
     zero_final_dl_input_df = final_dl_input_df.drop(
         final_dl_input_df.index[deletion_list]).reset_index(drop=True)
     zero_final_dl_input_df.to_csv(
         '.' + dir_opt + '/filtered_data/ZeroFinalDeepLearningInput.txt',
         index=False,
         header=True)
     print(zero_final_dl_input_df)
    def test_load_data_expect_summean_when_links_are_valid(self):

       endpoints_reader = LoadData()
       data = Processor()
       json_link = '[{"date":"22-01-2021","impressions": 1376}, \
       {"date":"21-01-2021","impressions": 1906},{"date":"20-01-2021","impressions": 2818},\
       {"date":"19-01-2021","impressions": 1024}, \
       {"date":"18-01-2021","impressions": 646},{"date":"17-01-2021","impressions": 2885}, \
       {"date":"16-01-2021","impressions": 1889},{"date":"15-01-2021","impressions": 1534}, \
       {"date":"14-01-2021","impressions": 995},{"date":"13-01-2021","impressions": 1251}, \
       {"date":"12-01-2021","impressions": 2062},{"date":"11-01-2021","impressions": 1204}, \
       {"date":"10-01-2021","impressions": 2030},{"date":"09-01-2021","impressions": 1166}, \
       {"date":"08-01-2021","impressions": 2025},{"date":"07-01-2021","impressions": 1221}, \
       {"date":"06-01-2021","impressions": 2018},{"date":"05-01-2021","impressions": 2484}, \
       {"date":"04-01-2021","impressions": 1145},{"date":"03-01-2021","impressions": 2686}, \
       {"date":"02-01-2021","impressions": 2186},{"date":"01-01-2021","impressions": 1527}, \
       {"date":"31-12-2020","impressions": 1710},{"date":"30-12-2020","impressions": 1343}, \
       {"date":"29-12-2020","impressions": 2466},{"date":"28-12-2020","impressions": 952}, \
       {"date":"27-12-2020","impressions": 532},{"date":"26-12-2020","impressions": 2690}, \
       {"date":"25-12-2020","impressions": 2428},{"date":"24-12-2020","impressions": 602}, \
       {"date":"23-12-2020","impressions": 995},{"date":"22-12-2020","impressions": 615}, \
       {"date":"21-12-2020","impressions": 2055},{"date":"20-12-2020","impressions": 1337}, \
       {"date":"19-12-2020","impressions": 1824},{"date":"18-12-2020","impressions": 1645}, \
       {"date":"17-12-2020","impressions": 2655},{"date":"16-12-2020","impressions": 2619}, \
       {"date":"15-12-2020","impressions": 1189},{"date":"14-12-2020","impressions": 2391}, \
       {"date":"13-12-2020","impressions": 1612},{"date":"12-12-2020","impressions": 510}, \
       {"date":"11-12-2020","impressions": 2655},{"date":"10-12-2020","impressions": 2029}, \
       {"date":"09-12-2020","impressions": 2899},{"date":"08-12-2020","impressions": 1170}, \
       {"date":"07-12-2020","impressions": 526},{"date":"06-12-2020","impressions": 2092}, \
       {"date":"05-12-2020","impressions": 1453},{"date":"04-12-2020","impressions": 738}, \
       {"date":"03-12-2020","impressions": 700},{"date":"02-12-2020","impressions": 1485}, \
       {"date":"01-12-2020","impressions": 1571},{"date":"30-11-2020","impressions": 2106}, \
       {"date":"29-11-2020","impressions": 2646},{"date":"28-11-2020","impressions": 1092}, \
       {"date":"27-11-2020","impressions": 1495},{"date":"26-11-2020","impressions": 2356}, \
       {"date":"25-11-2020","impressions": 1474},{"date":"24-11-2020","impressions": 1431}, \
       {"date":"23-11-2020","impressions": 1359},{"date":"22-11-2020","impressions": 1420}]'
       csv_link = 'ba026992-281a-42a6-8447-ae1c8a04106e.csv'
       csv_file, json_file = endpoints_reader.read_data(csv_link, json_link)
       summary_csv_info, summary_json_info = data.process_endpoints(csv_file, json_file)
       print(summary_csv_info, summary_json_info)
       self.assertEqual(str(summary_csv_info), '{"mean": 1781.85, "sum": 110475}')
       self.assertEqual(str(summary_json_info), '{"mean": 1660.4, "sum": 102945}')
예제 #25
0
def main(mode):
    dataset_path = 'data'
    data = LoadData(dataset_path=dataset_path, **data_params)

    print('Creating Batch Generator...')
    batch_gen = BatchGenerator(data_dict=data.data_dict,
                               label_dict=data.label_dict,
                               **batch_params)

    if mode == 'train':
        train(vocabs=[data.word2int, data.int2word],
              batch_gen=batch_gen,
              train_params=train_params,
              model_params=model_params)

    elif mode == 'test':
        print('Loading model')
        model_file = open('results/seq2seq.pkl', 'rb')
        model = pkl.load(model_file)
        print('Testing model...')
        test(model, data.int2word, batch_gen)
예제 #26
0
 def zero_final_drug_count():
     zero_final_dl_input_df = pd.read_table('./datainfo/filtered_data/zerofinal_GDSC2_dl_input.txt', delimiter = ',')
     zero_final_drug_list = []
     for drug in zero_final_dl_input_df['DRUG_NAME']:
         if drug not in zero_final_drug_list:
             zero_final_drug_list.append(drug)
     zero_final_drug_list = sorted(zero_final_drug_list)
     print(zero_final_drug_list)
     print(len(zero_final_drug_list))
     # Count the Number of Drugs Intersection Between [dl_input, drugBank]
     dir_opt = '/datainfo'
     drug_map_dict, drug_dict, gene_target_num_dict = LoadData(dir_opt).pre_load_dict()
     count = 0
     anti_count = 0
     for key, value in drug_map_dict.items():
         if type(value) is str:
             count = count + 1
         elif math.isnan(value) == True:
             anti_count = anti_count + 1
     print(count)
     print(anti_count)
예제 #27
0
    def test_cleanup(self):

        with open("../data/pt_BR/nnp") as f:
            nnp = [line.rstrip() for line in f.readlines()]
        with open("../data/pt_BR/terms") as f:
            terms = [line.rstrip() for line in f.readlines()]
        with open("../data/pt_BR/patterns") as f:
            patterns = [line.rstrip() for line in f.readlines()]

        data = LoadData(['../corpus/sel1.csv', '../corpus/sel2.csv'],
                        ['D', 'C']).load()
        p = CleanupData(nnp, terms, patterns)
        new_data = p.clean(data)

        with open("results/clean.txt", "w") as f:
            f.write("\n".join("%s;%s;%s" % (d.identifier, d.text, d.status)
                              for d in new_data))

        for i, d in enumerate(new_data):
            dirname = "conectado" if d.status == "C" else "desconectado"
            filename = "results/%s/clt_%s.txt" % (dirname, i)
            with open(filename, "w") as f:
                f.write(d.text)
예제 #28
0
def main(tablename,
         use_json=0,
         database='tnt',
         start_date=0,
         end_date=0,
         batch_size=30):
    try:
        last_version = get_last_version(tablename)
        file_path = "{}/data/{}_v{}.json".format(
            Settings.BASE_DIR, tablename,
            last_version) if use_json == 0 else use_json
        source_conn = Connection(
            'mo4jo').connect_mssql() if use_json is None else None

        target_conn = Connection(database).connect_mysql()

        start_date = start_date if start_date != None else '2012-01-01'
        batch_size = Settings.DATABASE_CONFIG['tnt']['batch_size']
        now = datetime.utcnow().date()

        mo4jo_pac_clients_mapping = json.loads(
            open(
                "{}/data/mo4jo_pac_clients_mapping.json".format(
                    Settings.BASE_DIR), "r").read())
        end_date = end_date if end_date != None else now
        # entity_json = PullData(source_conn, tablename, file_path).get_data(kwargs={'start_date':start_date,'end_date':end_date})
        entity_json = PullData(source_conn, tablename, file_path).get_data()
        if type(end_date) == 'datetime.date':
            end_date = datetime.strptime(end_date, '%Y-%m-%d').date()

        LoadData(target_conn, tablename, entity_json, start_date, end_date,
                 batch_size).load_data(mo4jo_pac_clients_mapping)
        # LoadData(target_conn, tablename, entity_json).load_data(mo4jo_pac_clients_mapping)
    except Exception as e:
        import traceback
        print(traceback.format_exc())
def manual_test_rand_nn(matrixA, matrixB, num_gene, num_pathway, layer1,
                        layer2, layer3, path, dir_opt, RNA_seq_filename):
    # RECONSTRCUT TEST MODEL
    model = RandNN().keras_rand_nn(matrixA, matrixB, num_gene, layer0, layer1,
                                   layer2, layer3)
    with open(path + '/layer_bias_list.txt', 'rb') as filebias:
        layer_bias_list = pickle.load(filebias)
    with open(path + '/layer_weight_list.txt', 'rb') as fileweight:
        layer_weight_list = pickle.load(fileweight)
    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['mse', 'accuracy'])
    xTmp, yTmp = LoadData(dir_opt, RNA_seq_filename).load_train(0, 1)
    model.fit(xTmp, yTmp, epochs=1, validation_split=1, verbose=0)
    model_layer_list = []
    num_layer = len(model.layers)
    for i in range(num_layer):
        each_layer_list = [layer_weight_list[i], layer_bias_list[i]]
        model_layer_list.append(each_layer_list)
        model.layers[i].set_weights(each_layer_list)
    # PREDICT MODEL USING [xTe, yTe]
    verbose = 1
    y_pred, score = RunRandNN(model, dir_opt,
                              RNA_seq_filename).test(verbose, path)
예제 #30
0
파일: test.py 프로젝트: Nao-Y1996/ros-unity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from load_data import LoadData
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import csv

def s(_data):
    print(type(_data),np.shape(_data))


load_data = LoadData()
load_data.load()

# データ読み込み
# iris = datasets.load_iris()
# data = iris.data
# target = iris.target
target = load_data.target
data = load_data.data
with open('_target.csv', 'w') as f:
    writer = csv.writer(f)
    for i in target:
        writer.writerow([i])
with open('_data.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows(data)