Python UtilMethods.getSpecies Examples

Programming Language: Python

Namespace/Package Name: utils

Class/Type: UtilMethods

Method/Function: getSpecies

Examples at hotexamples.com: 3

Python UtilMethods.getSpecies - 3 examples found. These are the top rated real world Python examples of utils.UtilMethods.getSpecies extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

listFilesExt(13)

normalizePath(11)

loadConfig(8)

getSpecies(3)

getLabel(3)

foldClusterData(2)

getFileName(2)

charGramShuffle(1)

getFileExt(1)

getSparkConf(1)

makeDefaultName(1)

Example #1

Show file

    def getDomains(self, sparkContext):

        # recover the species name for using in temp files
        self.species = Utils.getSpecies(self.source_path)
        domainFinder = DomainFinder.DomainFinder()

        # load source sequences into a single list
        if ("fasta" in self.source_type):
            list, file_content = Parsers.parseFastaToList(self.source_path, "")
        elif ("genbank" in self.source_type):
            list = Parsers.genBankToAminoacid(self.source_path)

        print('Processing domains...')

        # create RDD with source sequences
        sourceRDD = sparkContext.parallelize(file_content, numSlices=2000)

        if ("nucleotide" in self.source_type):
            # execute sixFrame translation for each sequence in RDD
            sourceRDD = sourceRDD.map(lambda x: SixFrameTranslator.main(x))

        # execute Pfam domain prediction for each sixFrame translation in RDD
        domainsRDD = sourceRDD.map(lambda x: domainFinder.main(x[0], x[1]))
        processedRDD = domainsRDD.map(
            lambda x: self.processDomainOutput(x[0], x[1]))

        # recover Pfam domain prediction results from RDD
        result = processedRDD.collectAsMap()

        print('Done!')

        return result

Example #2

Show file

def genBankToAminoacid(path):
    list = []
    # only aminoacid sequence
    translations = ''
    files = []
    if (os.path.isfile(path)):
        files.append(path)
    else:
        files = Utils.listFilesExt(path, 'gbk')

    for file in files:
        species = Utils.getSpecies(file)
        records = parseGenBank(file)

        for record in records:
            locus = record.id
            for feature in record.features:
                #if feature.key == "CDS":
                if feature.type == "CDS":
                    id, locus_tag, gene, protein_id, translation, \
                    product, function, description  = '','','','','','','',''

                    for key, value in feature.qualifiers.items():
                        # get rid of the quotes around the qualifier
                        # find entry ID
                        if key == "translation":
                            translation = value[0]
                        elif key == "gene":
                            gene = value[0]
                        elif key == "locus_tag":
                            locus_tag = value[0]
                        elif key == "protein_id":
                            protein_id = value[0]
                            protein_id = protein_id.replace('/', '')
                        elif key == "product":
                            product = value[0]
                        elif key == "function":
                            function = value[0]

                    #priority for gene ID
                    id = locus_tag if not id and len(locus_tag) > 1 else id
                    id = gene if not id and len(gene) > 1 else id
                    id = protein_id if not id and len(protein_id) > 1 else id

                    description = product if product.strip() else description
                    description += '|' + function if function.strip(
                    ) else description

                    entry = '>' + locus + '|' + species + '|' + id + '|' + description + '\n' + translation
                    if (entry not in list):
                        list.append(entry)
                        translations += translation

    return list, translations

Example #3

Show file

 def __init__(self, source_type=None, source_path=None, result_path=None):
     self.config = Utils.loadConfig()
     self.task = self.config.get('dataPipeline', 'task')
     self.source_path = self.config.get(
         'dataPipeline',
         'source.path') if source_path is None else source_path
     self.source_type = self.config.get(
         'dataPipeline',
         'source.type') if source_type is None else source_type
     self.result_path = self.config.get(
         'dataPipeline',
         'result.path') if result_path is None else result_path
     self.result_path = Utils.normalizePath(self.result_path)
     # create if it doesnt exist
     os.makedirs(os.path.dirname(self.result_path), exist_ok=True)
     # recover the species name for using in temp files
     self.species = Utils.getSpecies(self.source_path)
     # temp dir + file used by sub-pipelines
     self.path = os.path.dirname(os.path.realpath(__file__))
     self.path += '/temp/'
     os.makedirs(os.path.dirname(self.path), exist_ok=True)