def make_binaries (article, L, R, threshold=8000): logger = LoggingUtil.init_logging (__file__) result = [] for l in L: for r in R: distance = abs(l.docPos - r.docPos) logger.debug ("Distance - {0}".format (distance)) if distance < threshold: if article.date.find ("--") > -1: article.date = "0-0-9000" binary = KinaseBinary (id = 0, L = l.word, R = r.word, docDist = abs(l.docPos - r.docPos), paraDist = abs(l.paraPos - r.paraPos), sentDist = abs(l.sentPos - r.sentPos), code = 0, fact = False, refs = [], pmid = article.id, date = SerUtil.parse_date (article.date), file_name = article.fileName) logger.info ("Binary: {0}".format (binary)) result.append (binary) return result
def find_before (pmid_date, facts): logger.info ("Join facts with the pmid->date map to find interactions noticed before published discovery.") ref_pmid_to_binary = facts.map (lambda r : ( r[1][1].pmid, r[1][0] ) ) # ( intact.REF[pmid] -> KinaseBinary ) # TEST. Add reference pmids with late dates. pmid_date = pmid_date.union (ref_pmid_to_binary.map (lambda r : ( r[0], SerUtil.parse_date ("1-1-2300") ))) before = ref_pmid_to_binary. \ join (pmid_date). \ map (lambda r : r[1][0].copy (ref_date = r[1][1]) ). \ filter (lambda k : k.date and k.ref_date and k.date < k.ref_date). \ distinct () return before
def get_article_guesses (article): guesses = article.AB + article.BC + article.AC + article.BB skiplist = [ 'for', 'was', 'she', 'long' ] result = [] for g in guesses: if not g.L in skiplist and not g.R in skiplist: g.pmid = article.id try: date = SUtil.parse_date (article.date) if date: g.date = calendar.timegm (date.timetuple()) # print ("Parsed guess date -> {0}".format (g.date)) except: print ("No date parsed in {0} {1}".format (article.fileName, article.date)) traceback.print_exc () result.append ( ( make_key (g.L, g.R, g.pmid), Guesses.distance (g) ) ) return result