def load_clusters(): """Load info from topics.txt file into Cluster, TermCluster tables""" # Delete whatever's in the db already Cluster.query.delete() TermCluster.query.delete() count_clusters = 0 for row in open("topics.csv"): row = row.rstrip().split(",") # Parse the txt into the appropriate data types for seeding cluster = int(row[1][-3:]) word = row[3].strip() # Check if word is in our list of key terms. If it is, add to # TermCluster table to allow for lookup later (see seed.py for TODO) if Term.check_for_term(word) is True: term_cluster_to_add = TermCluster(word=word, cluster_id=cluster) db.session.add(term_cluster_to_add) db.session.commit() # Check if a cluster is in our list of clusters. If it's not, add it. if Cluster.check_for_cluster(cluster) is False: cluster_to_add = Cluster(cluster_id=cluster) db.session.add(cluster_to_add) db.session.commit() # Print where we are and increment counter print "Topics.txt seeding row", count_clusters count_clusters += 1
def load_studies_terms(): """Loads info from studies_terms.txt into StudyTerm & Term tables. File format: R ID \t pmid \t word \t frequency Source: Neurosynth features.txt, transformed in R to long format.""" print "Studies_terms.txt seeding" # Delete all rows in existing tables, so if we need to run this a second time, # we won't be trying to add duplicate users StudyTerm.query.delete() Term.query.delete() skip = True count_studies_terms = 0 studies_terms = open("seed_data/studies_terms.txt") for row in studies_terms: # Skip the first line of the file if skip: skip = False continue # Stop after 5000 lines # if count_studies_terms > 5000: # break # Parse txt file and convert to appropriate data types for seeding row = row.rstrip().split('\t') # If the term starts with "X", it is not a word but a number, e.g. "X01" # These don't make sense to track, so skip these rows. if row[2].startswith('\"X'): continue # Skip the lines indicating that a term did not appear anywhere # in the article (frequency of 0) if float(row[3]) == 0.0: continue pmid = int(row[1]) word = row[2].strip('\"').replace(".", " ") freq = float(row[3]) # Check if the word is already in Term; if not, add it if Term.check_for_term(word) is False: word_to_add = Term(word=word) db.session.add(word_to_add) # Add the row to the studies_terms table studies_terms_to_add = StudyTerm(word=word, pmid=pmid, frequency=freq) db.session.add(studies_terms_to_add) db.session.commit() # Print where we are and increment counter print "studies_terms.txt seeding row ", count_studies_terms count_studies_terms += 1
def create_term(id): form = forms.NewTermForm(request.form) if not form.validate(): flash("Error, all fields are required") else: term = Term(term=form.term.data, definition=form.definition.data, user_id=current_user.id, collection_id=id) model.session.add(term) model.session.commit() model.session.refresh(term) return redirect(url_for("view_collection", id=id))
def addNodes(nodes): added = [] for id in nodes: node,revision,tags = getNode(id) node['version'] = node['vid'] node['user'] = node['uid'] node['id'] = node['nid'] revision['user'] = revision['uid'] revision['id'] = revision['vid'] revision['node'] = revision['nid'] del(node['status']) del(node['uid']) del(node['nid']) del(node['vid']) del(revision['uid']) del(revision['nid']) del(revision['vid']) # add the nodes try: dbnode = Node.get(node['id']) del(node['id']) dbnode.set(**node) except SQLObjectNotFound: dbnode = Node(**node) try: dbnoder = NodeRevision.get(revision['id']) del(revision['id']) dbnoder.set(**revision) except SQLObjectNotFound: dbnoder = NodeRevision(**revision) # add the termnodes for tag in tags: tag['node'] = tag['nid'] tag['term'] = tag['tid'] del(tag['nid']) del(tag['tid']) try: dbtag = Term.get(tag['term']) except SQLObjectNotFound: dbtag = addTags([tag['term']])[0] terms = list(TermNode.selectBy(node=dbnode,term=dbtag)) if not terms: term = TermNode(**tag) else: term = terms[0] print "Node %d made!" % id added.append(id) return added
def load_clusters(): """Load info from topics.txt file into Cluster, TermCluster tables File format: R row id,Topic XXX,R column ID,word where XXX represents a number between 0-400 R ids can be discarded during seeding Source: topic clustering data from Neurosynth, converted to long format in R prior to seeding. Notes: the words tracked in this clustering are not in perfect alignment with those tracked in studies_terms.txt. Approximately 2000 of the terms in studies_terms have a topical cluster, the remaining ~1000 do not. This number could be improved by stemming. Many of the words not tracked in clusters are multi-word phrases.""" # Delete whatever's in the db already Cluster.query.delete() TermCluster.query.delete() count_clusters = 0 topics_fileobj = open('seed_data/topics.csv') for row in topics_fileobj: row = row.rstrip().split(',') # Parse the txt into the appropriate data types for seeding cluster = int(row[1][-3:]) word = row[3].strip() # Check if word is in our list of key terms. If it is, add to # TermCluster table to allow for lookup later (see model.py for TODO) if Term.check_for_term(word) is True: term_cluster_to_add = TermCluster(word=word, cluster_id=cluster) db.session.add(term_cluster_to_add) db.session.commit() # Check if a cluster is in our list of clusters. If it's not, add it. if Cluster.check_for_cluster(cluster) is False: cluster_to_add = Cluster(cluster_id=cluster) db.session.add(cluster_to_add) db.session.commit() # Print where we are and increment counter print "Topics.txt seeding row", count_clusters count_clusters += 1 topics_fileobj.close()
def syncNodes(): print "syncing nodes" tags_checked = [] users_checked = [] nodes_checked = [] tags_new = [] users_new = [] nodes_new = [] for node in getNewNodes(): tid = node['tid'] nid = node['nid'] changed = node['changed'] uid = node['uid'] if tid not in tags_checked: try: term = Term.get(tid) except SQLObjectNotFound: tags_new.append(tid) tags_checked.append(tid) if uid not in users_checked: try: node = User.get(uid) except SQLObjectNotFound: users_new.append(uid) users_checked.append(uid) if nid not in nodes_checked: try: node = Node.get(nid) if node.changed < changed: nodes_new.append(nid) except SQLObjectNotFound: nodes_new.append(nid) nodes_checked.append(nid) hub.begin() # add new db information addTags(tags_new) addUsers(users_new) nodes = addNodes(nodes_new) setWeight() hub.commit() return nodes
def display_term_graph(): """displays poetric terms information page""" last_term_id = 49 start_at = randint(1, last_term_id - SHOW_NUMBER) stop_before = start_at + SHOW_NUMBER term_data = Term.get_term_data(start_at=start_at, stop_before=stop_before) rough_terms = db.session.query(Term.term, Term.term_id).all() all_terms = {} for term, term_id in rough_terms: all_terms[term] = term_id return render_template("terms.html", term_data=term_data, all_terms=all_terms)
def add_term(): """Add term to the db.""" term = str(request.form.get('term')) user_id = int(request.form.get('user_id')) parent_id = int(request.form.get('parent_id')) # Add the new category to the db: new_term = Term(term=term, user_id=user_id, parent_id=parent_id) db.session.add(new_term) db.session.commit() results_dict = {'message' : "'%s' has been added as a term!" % term, "term_name" : term, "parent_id" : parent_id} return jsonify(results_dict)
def add_category(): """Add category to the db.""" category = str(request.form.get('category')) user_id = int(request.form.get('user_id')) # Add the new category to the db: new_category = Term(term=category, user_id=user_id) db.session.add(new_category) db.session.commit() category_object = Term.query.filter(Term.term == category).first() category_id = category_object.id results_dict = {"category_name" : category, "user_id" : user_id, "category_id" : category_id} return jsonify(results_dict)
def iter_options(): for o in select.findAll('option'): term = Term.parse(o.text) if term: attrs = dict(o.attrs) yield (term, attrs['value'])
def iter_options(): for o in select.findAll("option"): term = Term.parse(o.text) if term: attrs = dict(o.attrs) yield (term, attrs["value"])
def setWeight(): # set weight for tag in Term.select(): tag._get_weight(True)
def term_type(value): term = Term.parse(value) if term is None: raise argparse.ArgumentTypeError("invalid format") return term
def retrieve_words(): """Retrieves all available words in the db for autocomplete functionality.""" words = Term.get_all() return jsonify({"words": words})