Exemple #1
0
    def annotateFile(self, article, file):
        " go over words of text and check if they are in dict "
        text = file.content
        count = 0
        rows = []
        for start, end, markerType, markerId in geneFinder.findIdentifiers(
                text):
            # never resolve some types
            if markerType in ["band", "genbank", "refseq", "ensembl", "snp"]:
                if markerType == "snp":
                    markerId = "rs" + markerId
                row = [start, end, markerType, markerId, "", ""]
                rows.append(row)
            else:
                # try to resolve most other types to genes
                genes = geneFinder.markerToGenes(markerType, markerId)
                if genes != None and len(genes) < MAXGENEPERID:
                    geneSyms = set(genes)
                    for geneSym in geneSyms:
                        geneId, geneSym = genes.items()[0]
                        row = [
                            start, end, "gene", geneSym, markerType, markerId
                        ]
                        rows.append(row)
                else:
                    row = [start, end, markerType, markerId, "", ""]
                    rows.append(row)

            if len(rows) > MAXCOUNT:
                logging.info("%d annotations, too mant, skipping file %s" %
                             (MAXCOUNT, file.externalId))
                return None
        return rows
Exemple #2
0
 def map(self, article, file, text, resultDict):
     matches = list(geneFinder.findIdentifiers(text))
     for start, end, type, word in matches:
         resultDict.setdefault(word, set()).add(file.fileId)