Example #1
0
    def CigNet_prediction(self):
        new_table_data = self.scaler.transform(self.real_table)
        real_table = pd.DataFrame(new_table_data,
                                  index=self.real_table.index,
                                  columns=self.real_table.columns)

        result = predict_decision(self.predictor, real_table)
        result2 = predict_proba(self.predictor, real_table)
        result = np.concatenate([result, result2], axis=1)
        result_df = pd.DataFrame(result, index=real_table.index)
        result_df.columns = ['distance', 'non-driver_prob', 'driver_prob']

        stats = importr('stats')
        for l in [
                -2, -1.75, -1.5, -1.25 - 1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 1
        ]:
            result_df['p_value'] = 1 - norm.cdf(result_df['distance'], loc=l)
            result_df['q_value'] = stats.p_adjust(FloatVector(
                result_df["p_value"].tolist()),
                                                  method='BH')
            if result_df[result_df['q_value'] <
                         0.05].shape[0] * 1. / result_df.shape[0] < 0.65:
                break
        candidate_list = self.CellNet['from'].unique()
        result_df = result_df[result_df.index.isin(candidate_list)]
        result_df = result_df.sort_values(by='distance', ascending=False)
        result_df['Rank'] = result_df['distance'].rank(axis=0, ascending=False)
        return result_df
Example #2
0
def hello():
    if 'review' not in request.form:
        return jsonify({'error': 'no review in body'}), 400
    else:
        review = request.form['review']
        proba = predict_proba(review, model, tokenizer, max_length)[0, 0]
        #output = len(review)
        #print(review, proba)
        return jsonify(ceil(proba * 100))
Example #3
0
    def predict(self):
        active_columns = [x for x in self.real_table.columns if (x.lower().find('h3k4me3') != -1 or
                                                                 x.lower().find('h3k27ac') != -1 or
                                                                 x.lower().find('h3k4me1') != -1 or
                                                                 x.lower().find('h3k79me2') != -1 or
                                                                 x.lower().find('rna') != -1)
                                                                and x.find('single') == -1]

        suppressive_columns = [x for x in self.real_table.columns if (x.lower().find('h3k27me3') != -1 or
                                                                      x.lower().find('h3k9me3') != -1)
                                                                and x.find('single') == -1]

        paths = optimization(self.gene_meta_df, self.training_table, self.real_table,
                             active_columns, suppressive_columns, self.preknown)

        best_path = None
        best_score = None
        for key, value in paths.items():
            cur_score, cur_path = value
            if cur_score > best_score:
                best_score = cur_score
                best_path = cur_path
        train_df = self.training_table[best_path].copy()
        train_df = label_label(train_df, self.gene_meta_df)
        scaler = center_normalization(train_df.iloc[:, :-1])
        train_df.iloc[:, :-1] = preprocessing_table(scaler, train_df.iloc[:, :-1])
        predictor = predict_logisticregression(train_df.iloc[:, :-1], train_df.iloc[:, -1], c=.2)

        real_table = self.real_table[best_path].copy()
        real_table = preprocessing_table(scaler, real_table)

        result = predict_decision(predictor, real_table)
        result2 = predict_proba(predictor, real_table)

        result = np.concatenate([result, result2], axis=1)
        df = pd.DataFrame(result, index=real_table.index)
        df.columns = ['distance', 'non-CIG_prob', 'CIG_prob']
        del df['non-CIG_prob']

        for l in np.arange(-20, 1, 0.25):
            df['p_value'] = 1 - norm.cdf(df['distance'], loc=l)
            df['FDR'] = stats.p_adjust(FloatVector(df["p_value"].tolist()),
                                       method='BH')
            if df[df['FDR'] < 0.05].shape[0] < 600:
                break

        df = df.sort_values(by=['distance'], ascending=False)
        df['rank'] = range(1, df.shape[0] + 1)
        self.prediction = df
Example #4
0
def preknown_cost(meta_df, train_df, real_table, columns, preknown_list, verbose=False):
    cur_train_df = train_df[columns].copy()
    cur_train_df = label_label(cur_train_df, meta_df)
    scaler = center_normalization(cur_train_df.iloc[:, :-1])
    cur_train_df.iloc[:, :-1] = preprocessing_table(scaler, cur_train_df.iloc[:, :-1])
    predictor = predict_logisticregression(cur_train_df.iloc[:, :-1], cur_train_df.iloc[:, -1], c=.2)

    for i in range(len(predictor.coef_[0])):
        if columns[i].find('h3k27me3') != -1 and columns[i].find('kurtosis') == -1:
            predictor.coef_[0][i] = -1
        elif columns[i].find('h3k27me3') != -1 and columns[i].find('kurtosis') != -1:
            predictor.coef_[0][i] = 1

    cur_real_table = real_table[columns].copy()

    cur_real_table = preprocessing_table(scaler, cur_real_table)

    result = predict_decision(predictor, cur_real_table)
    result2 = predict_proba(predictor, cur_real_table)
    result = np.concatenate([result, result2], axis=1)
    result_df = pd.DataFrame(result, index=cur_real_table.index)
    result_df.columns = ['distance', 'non-CIG_prob', 'CIG_prob']
    del result_df['non-CIG_prob']
    result_df = result_df.sort_values(by=['distance'], ascending=False)
    result_df['rank'] = range(1, result_df.shape[0] + 1)

    cur_hit_df = result_df[(result_df.index.isin(preknown_list))]
    if verbose:
        print cur_hit_df

    cur_hit = cur_hit_df[cur_hit_df['rank'] <= 10].shape[0] * 15
    cur_hit += cur_hit_df[(cur_hit_df['rank'] > 10) & (cur_hit_df['rank'] < 20)].shape[0] * 10
    cur_hit += cur_hit_df[(cur_hit_df['rank'] > 20) & (cur_hit_df['rank'] < 50)].shape[0] * 8
    cur_hit += cur_hit_df[(cur_hit_df['rank'] > 50) & (cur_hit_df['rank'] < 100)].shape[0] * 3
    cur_hit += cur_hit_df[(cur_hit_df['rank'] > 100) & (cur_hit_df['rank'] < 200)].shape[0] * 1
    cur_hit += cur_hit_df[(cur_hit_df['rank'] > 200) & (cur_hit_df['rank'] < 500)].shape[0] * 0.1

    if verbose:
        print cur_hit, columns
    return cur_hit
        'ensemble/logits': [],
    }
}

criterion = CrossEntropyLoss().cuda()

for i in range(args.n_models):
    net = utils.load_model(os.path.join(args.models_dir, str(i), 'model'))
    net.eval()
    for s, mode, n in zip(['running', 'sample'], ['eval', 'ensemble'],
                          [1, args.n_tries]):
        utils.set_strategy(net, strategy=s)

        _, labels, logits = utils.predict_proba(testloader_kn,
                                                net,
                                                ensembles=n,
                                                n_classes=args.n_classes,
                                                return_logits=True)
        eval_data['known']['{}/logits'.format(mode)].append(logits)
        eval_data['known']['labels'] = labels

        _, _, logits = utils.predict_proba(testloader_ukn,
                                           net,
                                           ensembles=n,
                                           n_classes=args.n_classes,
                                           return_logits=True)
        eval_data['unknown']['{}/logits'.format(mode)].append(logits)

for d, t in product(['known', 'unknown'], ['eval', 'ensemble']):
    eval_data[d]['{}/logits'.format(t)] = np.squeeze(
        np.stack(eval_data[d]['{}/logits'.format(t)]))
Example #6
0
def model_prediction():

    model_path = 'networks/resnet-18_trained.t7'
    test_dir = 'Result/'
    cuda = True
    log_file = 'evalss_data'
    eval_rot = True
    eval_no_crop = True
    n_tries = 10
    seed = 42
    output_dir = 'Result'
    running = True

    print("loading model", model_path)

    torch.cuda.manual_seed_all(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    log_file = utils.uniquify(os.path.join(output_dir, log_file), sep='-')

    eval_data = test_dir
    net = utils.load_model(model_path, cuda)

    image_array = cv2.imread(os.path.join(test_dir, 'data', 'image.png'))
    mean_of_image = np.mean(image_array, axis=(0, 1))/1000
    std_of_image = np.std(image_array, axis=(0, 1))/100

    ###  Load the data into PyTorch using dataloader
    dataloader = utils.get_dataloader(test_dir,
            [0.6000, 0.3946, 0.6041],
            [0.2124, 0.2335, 0.2360],
            eval_no_crop, eval_rot, batch_size=1)

    #dataloader = utils.get_dataloader(test_dir, mean_of_image, std_of_image, eval_no_crop, eval_rot, batch_size=1)
    print(type(dataloader))

    '''
    # A function to get probabililties using only one iteration
    net = net.eval()
    for img, label in dataloader:
        print(type(img))
        print(type(label))

        img = img.cuda()
        print('IMAGE TYPE:',img)
        pred = net(img).data.cpu().numpy()
    print('checking', pred)
    probs = nn.functional.softmax(pred)
    print('PROBABILITIES:', probs)
    '''

    if not running:
        net.eval()
        utils.set_strategy(net, 'sample')
        have_do = utils.set_do_to_train(net)

        res = utils.predict_proba(dataloader, net, n_classes=5, return_logits=True, ensembles=n_tries, cuda=cuda)
        print('Result', res)
        '''
        eval_data['test'] = {
            'ensemble/proba': res[0],
            'ensemble/logits': res[2],
            'eval/labels': res[1],
            'ensemble/filenames': res[3]
        }    '''
    else:
        net.eval()
        utils.set_strategy(net, 'running')
        have_do = utils.set_do_to_train(net)

        res = utils.predict_proba(dataloader, net, n_classes=5, return_logits=True, ensembles=n_tries if have_do else 3)
        print('type(eval_data):', type(eval_data))
        '''
        eval_data['test'].update({
            'eval/proba': res[0],
            'eval/logits': res[2],
            'eval/labels': res[1],
            'ensemble/filenames': res[3]
        }) '''

    # Get the mean of predictions for n_tries iterations for each class
    prob_means_en = np.mean(res[0], axis=0)

    output_file_name = 'res_norotate'
    torch.save(res, output_dir + '/' + output_file_name)
    #print(res[2].shape)
    print('Created output file \'', output_file_name, ' \' ')

    torch.cuda.empty_cache()
    return (prob_means_en)
Example #7
0
ckpt = torch.load(args.model)
log_file = utils.uniquify(os.path.join(os.path.dirname(args.model), args.log_file), sep='-')

_, dataloader_known = utils.get_dataloader(data=args.data_kn, test_bs=args.test_bs, data_root=args.data_root)
_, dataloader_unknown = utils.get_dataloader(data=args.data_ukn, test_bs=args.test_bs,
                                                         data_root=args.data_root, drop_last_test=True)

eval_data = {}

net = utils.load_model(args.model)
net.eval()
utils.set_strategy(net, 'sample')
have_do =  utils.set_do_to_train(net)

res = utils.predict_proba(dataloader_unknown, net, n_classes=args.n_classes, return_logits=True, ensembles=args.n_tries)

eval_data['unknown'] = {
    'ensemble/proba': res[0],
    'ensemble/logits': res[2],
    'ensemble/labels': res[1]
}

res = utils.predict_proba(dataloader_known, net, n_classes=args.n_classes, return_logits=True, ensembles=args.n_tries)
eval_data['known'] = {
    'ensemble/proba': res[0],
    'ensemble/logits': res[2],
    'ensemble/labels': res[1]
}

net.eval()