Ejemplo n.º 1
0
def _run_hhblits(proteins, name):
    # Generates multiple sequence alignment using hhblits

    sequences = [_get_seq(p) for p in proteins]
    target_hhblits_path = get_target_hhblits_path(name)
    check_path(target_hhblits_path)

    fname = os.path.join(target_hhblits_path, name + '_seed.fasta')

    # SeqIO.write(sequences, query, "fasta")
    run_clustalo(sequences, fname)
    output_hhblits = os.path.join(target_hhblits_path, name + '.a3m')
    output_reformat1 = os.path.join(target_hhblits_path, name + '.a2m')
    output_reformat2 = os.path.join(target_hhblits_path, name + '_.fasta')

    db_hh = '/vol/sci/bio/data/or.zuk/projects/ContactMaps/data/uniref30/UniRef30_2020_06'

    hhblits_params = '-n 3 -e 1e-3 -maxfilt 10000000000 -neffmax 20 -nodiff -realign_max 10000000000'

    hhblits_cmd = f'hhblits -i {fname} -d {db_hh} {hhblits_params} -oa3m {output_hhblits}'
    subprocess.run(hhblits_cmd, shell=True)
    # subprocess.run(hhblits_cmd, shell=True, stdout=open(os.devnull, 'wb'))
    reformat = ['reformat.pl', output_hhblits, output_reformat1]
    subprocess.run(reformat)

    reformat = ['reformat.pl', output_reformat1, output_reformat2]
    subprocess.run(reformat)
    fam_msa = "/vol/sci/bio/data/or.zuk/projects/ContactMaps/src/Periscope/data/families/xcl1_family/msa.fasta"
    os.rename(output_reformat2, fam_msa)
Ejemplo n.º 2
0
def main():
    args = parse_args()
    dataset = args.dataset
    LOGGER.info(f'Working on {dataset}')
    path_to_upload = os.path.join(PATHS.models, 'modeller', dataset)
    check_path(path_to_upload)
    save_modeller_scores(dataset)

    upload_folder(path_to_upload, path_to_upload.split('Periscope/')[-1])
Ejemplo n.º 3
0
def get_target_path(target, family=None):
    if family is not None:
        fam_path = os.path.join(PATHS.periscope, 'data', 'families', family,
                                target)
        check_path(fam_path)
        return fam_path

    f_name = target
    t_path = os.path.join(PATHS.proteins, target[1:3], f_name)
    check_path(t_path)
    return t_path
Ejemplo n.º 4
0
def main():
    family = "xcl1_family"
    dc = DataCreator("A0A2K6CRQ6", family=family)
    msa = dc._parse_msa()
    property_path = os.path.join(PATHS.periscope, "property")
    check_path(property_path)
    for t in msa:

        try:
            _ = DataCreator(t, family=family, train=False).raptor_properties

            pth = _get_property_path(t, family)
            dst = os.path.join(property_path, t)
            shutil.copytree(pth, dst)

            upload_folder(dst, dst.split('Periscope/')[-1])
        except Exception:
            pass
Ejemplo n.º 5
0
def _save_plot_matrices(model: ContactMapEstimator, predictions, family=None):
    for target in predictions['logits']:
        data = {}
        ds = get_target_dataset(target) if family is None else family
        if ds is None:
            LOGGER.info(f'Problem with {target}')
            continue
        data_path = os.path.join(model.path, 'predictions', ds)
        check_path(data_path)
        target_path = os.path.join(data_path, target)
        check_path(target_path)
        dc = DataCreator(target, family=family)
        refs_contacts = dc.refs_contacts
        # pd.DataFrame(refs_contacts).to_csv(os.path.join(target_path, 'refs_contacts.csv'))
        prediction = np.squeeze(predictions['logits'][target])
        data['prediction'] = prediction
        weights = np.squeeze(predictions['weights'][target])
        data['weights'] = weights
        # pd.DataFrame(prediction).to_csv(os.path.join(target_path, 'prediction.csv'))
        try:
            gt = dc.protein.cm
        except Exception:
            gt = None
        data['gt'] = gt
        # pd.DataFrame(gt).to_csv(os.path.join(target_path, 'gt.csv'))
        if family is None:
            data['alignment'] = dc.templates_aln
            # data['evfold'] = dc.evfold
            data['ccmpred'] = dc.ccmpred
            data['templates'] = dc.k_reference_dm_test
            data['seqs'] = dc.seq_refs_ss_acc
            data['beff'] = dc.beff
            data['refs_contacts'] = refs_contacts

        pkl_save(os.path.join(target_path, 'data.pkl'), data)
        upload_folder(target_path, target_path.split('Periscope/')[-1])