def parse(): print("Start to import GO terms") count = 0 name, definition, oid, namespace, tree_number_list, synonyms = None, None, None, None, [], [] with open("resources/go.obo", 'r') as f: for line in f: if line == "[Term]\n" or line == "\n": if oid and oid.find("GO") == 0: term = Term(name=name, definition=definition, oid=oid, namespace=namespace, tree_number_list=tree_number_list, synonyms= synonyms, source="GO") term.save() count += 1 if count % 100 == 0: print(".", end='',flush=True) name, definition, oid, namespace, tree_number_list, synonyms = None, None, None, None, [], [] else: if line.find('name:') == 0: name = line.strip()[6:] if line.find('id:') == 0: oid = line.strip()[4:] elif line.find('def:') == 0: definition = line.split('"')[1] elif line.find('namespace:') == 0: namespace = line.strip()[11:] elif line.find('is_a:') == 0: tree_number_list.append(line.strip()[6:16]) elif line.find('synonym:') == 0: synonyms.append(line.split('"')[1]) print("\nFinished importing GO terms") print("Start to fetch ancestor objects") count = 0 for term in Term.objects(source="GO"): tree_number_list = term.tree_number_list for number in tree_number_list: try: object = Term.objects(oid=number).get() term.update(push__ancestors=object) except Exception as e: logging.warning(e) logging.warning(number) count += 1 if count % 100 == 0: print(".", end='',flush=True)
def show(id): try: paper = Paper.objects(id=id).get() abstract = paper.abstract tokens = ''.join(c for c in abstract if c.isalnum() or c.isspace()).split() tokens.extend([p.plural(token) for token in tokens]) two_word = [ " ".join(tokens[i:i+2]) for i in range(len(tokens)-1)] two_word_plural = [ p.plural(w) for w in two_word ] tokens.extend(two_word) tokens.extend(two_word_plural) tokens.extend([t[0].upper()+t[1:] if t[0].islower() else t[0].lower()+t[1:] for t in tokens]) terms = [term.serialize() for term in Term.objects(name__in=tokens)] abstract = [abstract] for term in terms: # print('term {}; len {}'.format(term['name'], len(abstract))) name = term['name'] for i in range(len(abstract)): part = abstract[i] if isinstance(part, str): re1 = re.compile(name, re.I) # print("search {} in {}".format(name, part)) m = re.search(re1, part) if m: term_here = term.copy() term_here['here'] = m.group() part = re.split(re1, part) new_part = [term_here] * (len(part) * 2 - 1) new_part[0::2] = part part = new_part abstract = abstract[:i] + part + abstract[i+1:] continue plural = p.plural(name) re2 = re.compile(plural, re.I) # print("search {} in {}".format(name, part)) m = re.search(re2, part) if m: term_here = term.copy() term_here['here'] = m.group() part = re.split(re2, part) new_part = [term_here] * (len(part) * 2 - 1) new_part[0::2] = part part = new_part abstract = abstract[:i] + part + abstract[i+1:] return jsonify(response=abstract) except Exception as e: logging.warning(e) return jsonify(response=list(), error=True)
def recommend_from_user(user): bag = defaultdict(int) for subscription in user.subscriptions: all_tokens = [] papers = subscription.papers num_paper = len(papers) # voc = Vocabulary() for paper in papers: try: abstract = paper.abstract words = list(set([x.strip().strip(",.").lower() for x in abstract.split()])) for word in words: if word not in common_words: bag[word] += 1 except Exception as e: logging.warning(e) for paper in papers: # Map to terms abstract = paper.abstract tokens = ''.join(c for c in abstract if c.isalnum() or c.isspace()).split() tokens.extend([p.plural(token) for token in tokens]) # two_word = [ " ".join(tokens[i:i+2]) for i in range(len(tokens)-1)] # two_word_plural = [ p.plural(w) for w in two_word ] # tokens.extend(two_word) # tokens.extend(two_word_plural) tokens.extend([t[0].upper()+t[1:] if t[0].islower() else t[0].lower()+t[1:] for t in tokens]) all_tokens.extend(tokens) # print(Term.objects(name__in=all_tokens).count()) all_tokens = set(all_tokens) try: all_tokens.remove('toes') all_tokens.remove('Toes') except: pass terms = [term.name for term in Term.objects(name__in=all_tokens)] for term in terms: bag[term] += num_paper for history in SearchHistory.objects(user=user.id): keyword = history.item.keyword bag[keyword] += 100 return [(x, bag.__getitem__(x)) for x in bag] # return [(x, bag.__getitem__(x)) for x in sorted(bag, key=bag.__getitem__, reverse=True)]