Exemple #1
0
    def test_sequence_guess_biotype_1(self):
        expected_type = ces.guess_type(_SEQUENCE_3)

        seq = ces.sequence(seqid=1,
                           oligomer='1IXY',
                           chains={'C', 'D'},
                           seq=_SEQUENCE_3)

        returned_type = seq.guess_biotype()

        self.assertEqual(expected_type, returned_type)
Exemple #2
0
def main():
    starttime = time.time()
    parser = create_argument_parser()
    args = parser.parse_args()

    global logger
    logger = pcl.pisacov_logger(level="info")
    welcomemsg, starttime = pcl.welcome(command=__script__)
    logger.info(welcomemsg)

    # PARSE CONFIGURATION FILE:
    invals = pco._initialise_inputs()

    invals['INSEQ'] = None
    invals['INIFS'] = None
    invals['OUTROOT'] = None
    invals['OUTCSVPATH'] = None

    # READ INPUT ARGUMENTS
    invals['INSEQ'] == ppaths.check_path(args.seqpath[0], 'file')

    invals['INIFS'] = []
    args.remove_insertions = False
    for fp in args.dimers:
        if '*' in fp:
            invals['INIFS'] += ppaths.check_wildcard(fp)
        else:
            invals['INIFS'].append(ppaths.check_path(fp, 'file'))
    invals['INIFS'] = list(dict.fromkeys(invals['INIFS']))

    if args.hhblits_arguments is not None:
        invals['HHBLITS_PARAMETERS'] = pco._check_hhparams(
            args.hhblits_arguments)
    else:
        pass

    if args.skip_conpred is True:
        skipexec = True
        if args.hhblits_arguments is not None:
            logger.info('HHblits parameters given bypassed by --skip_conpred')
    else:
        skipexec = False

    if args.outdir is None:
        invals['OUTROOT'] = ppaths.check_path(os.path.dirname(invals['INSEQ']))
    else:
        invals['OUTROOT'] = ppaths.check_path(os.path.join(args.outdir[0], ''))
    ppaths.mdir(invals['OUTROOT'])

    if args.collection_file is None:
        invals['OUTCSVPATH'] = ppaths.check_path(
            os.path.join(invals['OUTROOT'],
                         ("evcovsignal" + os.extsep + "full" + os.extsep +
                          "pisacov" + os.extsep + "csv")))
    else:
        invals['OUTCSVPATH'] = ppaths.check_path(args.collection_file[0])

    if os.path.isfile(invals['OUTCSVPATH']) is False:
        pic.csvheader(invals['OUTCSVPATH'], cropped=False)

    # Define formats used
    sources = pco._sources()

    # Parse sequence and structure files
    logger.info('Parsing sequence file...')
    seqs = cps.parseseqfile(invals['INSEQ'])

    if len(seqs) == 1:
        if len(seqs) == 1:
            for key in seqs:
                pdbid = key.lower()
    else:
        raise Exception('More than one pdbid in sequence set.')

    seq = seqs[pdbid]

    outpdbdir = os.path.join(invals['OUTROOT'], pdbid, "")

    # RENUMBERING
    fseq = {}
    fmsa = {}

    if skipexec is False:
        if invals['INIFS'] is not None:
            logger.info('Renumbering interfaces provided ' +
                        'according to position in sequence.')
            for path in invals['INIFS']:
                instrc = os.path.join(invals['OUTROOT'], pdbid,
                                      os.path.basename(path))
                logger.info(pcl.running('CROPS-renumber'))
                itime = datetime.datetime.now()
                psc.renumcrops(invals['INSEQ'], path, invals['OUTROOT'])
                copyfile(path, instrc)
                logger.info(pcl.running('CROPS-renumber', done=itime))

        ppaths.mdir(outpdbdir)

    for i, iseq in seq.imer.items():
        fiseq = pdbid + '_' + i + os.extsep + 'fasta'
        fseq[i] = os.path.join(invals['OUTROOT'], pdbid, fiseq)
        fiseq = pdbid + '_' + i + os.extsep + 'msa' + os.extsep + 'aln'
        fmsa[i] = os.path.join(invals['OUTROOT'], pdbid, 'hhblits', fiseq)
        if skipexec is False:
            iseq.dump(fseq[i])

    # EXECUTION OF EXTERNAL PROGRAMS
    hhdir = os.path.join(invals['OUTROOT'], pdbid, 'hhblits', '')
    dmpdir = os.path.join(invals['OUTROOT'], pdbid, 'dmp', '')
    fstr = []
    for file in invals['INIFS']:
        fstr.append(
            os.path.join(
                invals['OUTROOT'],
                (os.path.splitext(os.path.basename(file))[0] + os.extsep +
                 'crops' + os.extsep + 'seq' + os.extsep + 'pdb')))

    if skipexec is False:
        # MSA GENERATOR
        ppaths.mdir(hhdir)

        if invals['HHBLITS_PARAMETERS'] == ['3', '0.001', 'inf', '50', '99']:
            logger.info(
                'Generating Multiple Sequence Alignment using DeepMetaPSICOV default parameters... [AS RECOMMENDED]'
            )
        elif invals['HHBLITS_PARAMETERS'] == ['2', '0.001', '1000', '0', '90']:
            logger.info(
                'Generating Multiple Sequence Alignment using HHBlits default parameters...'
            )
        else:
            logger.info(
                'Generating Multiple Sequence Alignment using user-custom parameters...'
            )

        for i, iseq in seq.imer.items():
            sfile = fseq[i]
            afile = fmsa[i]
            logger.info(pcl.running('HHBlits'))
            itime = datetime.datetime.now()
            themsa = psm.runhhblits(sfile, invals['HHBLITS_PARAMETERS'], hhdir)
            logger.info(pcl.running('HHBlits', done=itime))
            iseq.msa = themsa

    # DEEP META PSICOV RUN
        logger.info(
            'Generating contact prediction lists via DeepMetaPSICOV...')

        ppaths.mdir(dmpdir)
        for i, iseq in seq.imer.items():
            sfile = fseq[i]
            afile = fmsa[i]
            nsfile = os.path.join(dmpdir, os.path.basename(sfile))
            if sfile != nsfile:
                copyfile(sfile, nsfile)
            logger.info(pcl.running('DeepMetaPSICOV'))
            itime = datetime.datetime.now()
            psd.rundmp(nsfile, afile, dmpdir)
            logger.info(pcl.running('DeepMetaPSICOV', done=itime))

    # GENERATE INTERFACE LIST
    iflist = []
    for filepath in fstr:
        ifname = os.path.splitext(os.path.basename(filepath))[0]
        iflist.append(pci.interface(name=ifname))

    # CONTACT ANALYSIS AND MATCH
    logger.info('Opening output csv files...')
    resultdir = os.path.join(invals['OUTROOT'], pdbid, 'pisacov', '')
    ppaths.mdir(resultdir)
    csvfile = os.path.join(
        resultdir, (pdbid + os.extsep + "evcovsignal" + os.extsep + "full" +
                    os.extsep + "pisacov" + os.extsep + "csv"))

    pic.csvheader(csvfile, cropped=False, pisascore=False)

    logger.info('Parsing sequence files...')
    for i, fpath in fseq.items():
        seq.imer[i].seqs['conkit'] = ckio.read(fpath, 'fasta')[0]
        seq.imer[i].biotype = csq.guess_type(seq.imer[i].seqs['mainseq'])

    logger.info('Parsing contact predictions lists...')
    conpred = {}
    matches = []
    for s in seq.imer:
        if s not in conpred:
            conpred[s] = {}
        for source, attribs in sources.items():
            fc = os.path.splitext(os.path.basename(fseq[s]))[0]
            fc += attribs[1]
            confile = os.path.join(invals['OUTROOT'], pdbid, attribs[0], fc)
            conpred[s][source] = ckio.read(confile, attribs[2])[0]

    logger.info('Parsing crystal structure contacts...')
    for i in range(len(iflist)):
        inputmap = ckio.read(fstr[i], 'pdb')
        if len(inputmap) == 4:
            chnames = list(iflist[i].chains.keys())
            chtypes = list(iflist[i].chains.values())
            if (seq.whatseq(chnames[0]) != seq.whatseq(chnames[1])
                    or (chtypes[0] != 'Protein' or chtypes[1] != 'Protein')):
                if chtypes[0] != "Protein" or chtypes[1] != "Protein":
                    logger.info(
                        'Interface ' + str(i) +
                        ' is not a Protein-Protein interface. Ignoring.')
                else:
                    logger.info('Interface ' + str(i) +
                                ' is not a homodimer. Ignoring.')
                iflist[i].structure = None
                matches.append(None)
                continue
            s = seq.whatseq(chnames[0])
            try:
                iflist[i].structure = []
                for m in range(len(inputmap)):
                    iflist[i].structure.append(inputmap[m].as_contactmap())
                    iflist[i].structure[m].id = inputmap[m].id
            except Exception:
                for m in range(len(inputmap)):
                    iflist[i].structure.append(inputmap[m])  # ConKit LEGACY.

            matches.append({})
            for source, attribs in sources.items():
                matches[i][source] = pcc.contact_atlas(
                    name=pdbid + '_' + str(s),
                    conpredmap=conpred[s][source],
                    strmap=iflist[i].structure,
                    sequence=seq.imer[s],
                    removeintra=True)
        else:
            iflist[i].structure = None
            matches.append(None)
            continue

    logger.info('Computing results and writing them to file...')
    for i in range(len(iflist)):
        if matches[i] is None:
            continue
        results = [pdbid, str(i + 1)]
        results.append(matches[i]['psicov'].chain1)
        results.append(matches[i]['psicov'].chain2)
        sid = seq.whatseq(matches[i]['psicov'].chain1)
        results.append(str(sid))
        results.append(str(seq.imer[sid].length()))
        results.append(str(seq.imer[sid].cropmsa.meff))
        results.append(str(seq.imer[sid].ncrops()))
        results.append(str(seq.imer[sid].full_length()))
        for source, attribs in sources.items():
            appresults = pcs.list_scores(matches[i][source], tag=source)
            results += appresults

        pic.lineout(results, csvfile)
        pic.lineout(results, invals['OUTCSVPATH'])

    endmsg = pcl.ok(starttime, command=__script__)
    logger.info(endmsg)

    return
Exemple #3
0
    def test_guess_type_4(self):
        expected_type = "RNA"

        obtained_type = ces.guess_type(_SEQUENCE_4)

        self.assertEqual(obtained_type, expected_type)
Exemple #4
0
    def test_guess_type_2(self):
        expected_type = "Protein"

        obtained_type = ces.guess_type(_SEQUENCE_2)

        self.assertEqual(obtained_type, expected_type)