예제 #1
0
    def prepare_run(self, log, info):
        workdir = info[Keys.WORKDIR]
        iprophoutput = info['PEPCSV']
        alleles = pso.fixallele(info['ALLELE_LIST'])

        exe = info['NETMHCCONS']

        self.df = pd.read_csv(iprophoutput,sep="\t", header=0)
        uniqueSeq = self.df.drop_duplicates( subset=['search_hit'] )
        pepseq = uniqueSeq['search_hit'].tolist()

        self.outfiles, commands = pso.prepare_for_NetMhcCons(workdir, pepseq, alleles, exe)
        return info, commands
예제 #2
0
    def prepare_run(self, log, info):
        workdir = info[Keys.WORKDIR]
        iprophoutput = info['PEPCSV']

        exe = info['NETMHCCONS']
        dbpath = info['DB_PATH']
        self.db = pd.read_csv(dbpath, sep=",", header=0)
        self.iprophet = pd.read_csv(iprophoutput, sep="\t", header=0)

        filesiprophet = list(self.iprophet['spectrum'])
        filesiprophet = map(lambda w: w.split(".")[0], filesiprophet)
        self.iprophet['fileid'] = filesiprophet

        filesdb = list(self.db['FileName'])
        filesdb = map(lambda w: os.path.splitext(w)[0], filesdb)
        self.db['fileid'] = filesdb

        self.mergefiles = self.iprophet.merge(self.db, left_on='fileid', right_on='fileid', how='inner')

        sampleIDs = self.mergefiles['SampleID'].unique()

        self.outfiles = []
        self.commands = []

        for sampleID in sampleIDs:
            allSet = self.mergefiles[self.mergefiles['SampleID'] == sampleID]
            alleles = (allSet['MHCAllele'].unique()).tolist()
            if (len(alleles) != 1):
                log.warning("there is more than one allele : {}".format('-'.join(alleles)))
            pepseq = (allSet['search_hit'].unique()).tolist()
            alleles = alleles[0].split(',')
            tmpdir = os.path.join(workdir, sampleID)
            if not os.path.exists(tmpdir):
                os.makedirs(tmpdir)
            print tmpdir
            outfiles, commands = pso.prepare_for_NetMhcCons(tmpdir, pepseq, alleles, exe)
            self.outfiles += outfiles
            self.commands += commands
        return info, self.commands