예제 #1
0
def predict(args, model, tcrs, peps):
    assert len(tcrs) == len(peps)
    tcrs_copy = tcrs.copy()
    peps_copy = peps.copy()
    dummy_signs = [0.0] * len(tcrs)

    # Word to index dictionary
    amino_acids = [letter for letter in 'ARNDCEQGHILKMFPSTWYV']
    if args.model_type == 'lstm':
        amino_to_ix = {
            amino: index
            for index, amino in enumerate(['PAD'] + amino_acids)
        }
    if args.model_type == 'ae':
        pep_atox = {
            amino: index
            for index, amino in enumerate(['PAD'] + amino_acids)
        }
        tcr_atox = {
            amino: index
            for index, amino in enumerate(amino_acids + ['X'])
        }
    max_len = 28
    batch_size = 50

    # Predict
    if args.model_type == 'ae':
        test_batches = ae.get_full_batches(tcrs, peps, dummy_signs, tcr_atox,
                                           pep_atox, batch_size, max_len)
        preds = ae.predict(model, test_batches, args.device)
    if args.model_type == 'lstm':
        lstm.convert_data(tcrs, peps, amino_to_ix)
        test_batches = lstm.get_full_batches(tcrs, peps, dummy_signs,
                                             batch_size, amino_to_ix)
        preds = lstm.predict(model, test_batches, args.device)
    # Print predictions
    # for tcr, pep, pred in zip(tcrs_copy, peps_copy, preds):
    #     print('\t'.join([tcr, pep, str(pred)]))
    return tcrs_copy, peps_copy, preds
예제 #2
0
파일: ERGO.py 프로젝트: yunchen-yang/ERGO
def predict(args):
    # Word to index dictionary
    amino_acids = [letter for letter in 'ARNDCEQGHILKMFPSTWYV']
    if args.model_type == 'lstm':
        amino_to_ix = {
            amino: index
            for index, amino in enumerate(['PAD'] + amino_acids)
        }
    if args.model_type == 'ae':
        pep_atox = {
            amino: index
            for index, amino in enumerate(['PAD'] + amino_acids)
        }
        tcr_atox = {
            amino: index
            for index, amino in enumerate(amino_acids + ['X'])
        }

    if args.ae_file == 'auto':
        args.ae_file = 'TCR_Autoencoder/tcr_autoencoder.pt'
    if args.model_file == 'auto':
        dir = 'models'
        p_key = 'protein' if args.protein else ''
        args.model_file = dir + '/' + '_'.join(
            [args.model_type, args.dataset, args.sampling, p_key, 'model.pt'])
    if args.test_data_file == 'auto':
        args.test_data_file = 'pairs_example.csv'

    # Read test data
    tcrs = []
    peps = []
    signs = []
    max_len = 28
    with open(args.test_data_file, 'r') as csv_file:
        reader = csv.reader(csv_file)
        for line in reader:
            tcr, pep = line
            if args.model_type == 'ae' and len(tcr) >= max_len:
                continue
            tcrs.append(tcr)
            peps.append(pep)
            signs.append(0.0)
    tcrs_copy = tcrs.copy()
    peps_copy = peps.copy()

    # Load model
    device = args.device
    if args.model_type == 'ae':
        model = AutoencoderLSTMClassifier(10, device, 28, 21, 30, 50,
                                          args.ae_file, False)
        checkpoint = torch.load(args.model_file, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(device)
        model.eval()
    if args.model_type == 'lstm':
        model = DoubleLSTMClassifier(10, 30, 0.1, device)
        checkpoint = torch.load(args.model_file, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(device)
        model.eval()
        pass

    # Predict
    batch_size = 50
    if args.model_type == 'ae':
        test_batches = ae.get_full_batches(tcrs, peps, signs, tcr_atox,
                                           pep_atox, batch_size, max_len)
        preds = ae.predict(model, test_batches, device)
    if args.model_type == 'lstm':
        lstm.convert_data(tcrs, peps, amino_to_ix)
        test_batches = lstm.get_full_batches(tcrs, peps, signs, batch_size,
                                             amino_to_ix)
        preds = lstm.predict(model, test_batches, device)

    # Print predictions
    for tcr, pep, pred in zip(tcrs_copy, peps_copy, preds):
        print('\t'.join([tcr, pep, str(pred)]))