"word": word.lemma if is_lemmatized else word.word, "is_lemmatized": 1 if is_lemmatized else 0, "count": word.sentence_count, } if "query_id" in params: result["id"] = "." + str(word.id) if is_lemmatized else str(word.id) result["document_count"] = word.document_count else: result["id"] = str(word.word_id) results.append(result) return results def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( WordsView, wordseer, 'words_view', 'word', parents=["project"], )
for property in document.properties: info[property.name] = property.value info["metadata"] = self.make_unit_metadata(document) self.add_unit_info(document, info) return jsonify(info) def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( DocumentsView, wordseer, 'documents_view', 'document', parents=["project"], ) register_rest_view( SingleDocumentView, wordseer, 'single_document_view', 'document_content', parents=["project"], )
self.add_relation_to_response(response, "dep", relation) response["search"] = WordInSentence.query.\ filter(WordInSentence.project_id == project.id).\ filter(WordInSentence.word_id.in_(word_ids)).count() return jsonify(response) GRAMMATICAL_RELATION_GROUPS = [ "amod advmod acomp", "agent subj nsubj xsubj csubj nsubjpass csubjpass", "dobj iobj pobj", "prep_because prep_because_of prep_on_account_of prep_owing_to prepc_because prepc_because_of prepc_on_account_of prepc_owing_to", "conj_and", "prep_with prepc_with prep_by_means_of prepc_by_means_of", "prep pobj", "prep_to", "prep_from", "prep_of", "prep_on", "prep_by", "prep_in", "abbrev", "acomp", "advcl", "advmod", "agent", "amod", "appos", "attr", "aux", "auxpass", "cc", "ccomp", "complm", "conj", "cop", "csubj", "csubjpass", "dep", "det", "dobj", "expl", "infmod", "iobj", "mark", "mwe", "neg", "nn", "npadvmod", "nsubj", "nsubjpass", "num", "number", "parataxis", "partmod", "pcomp", "pobj", "poss", "preconj", "predet", "prep", "prepc", "prt", "punct", "purpcl", "quantmod", "rcmod", "ref", "rel", "root", "tmod", "xcomp", "xsubj" ] register_rest_view( GrammaticalSearchOptionsView, wordseer, 'wordmenu_view', 'grammatical_search_option', parents=["project"], )
"num", "number", "parataxis", "partmod", "pcomp", "pobj", "poss", "preconj", "predet", "prep", "prepc", "prt", "punct", "purpcl", "quantmod", "rcmod", "ref", "rel", "root", "tmod", "xcomp", "xsubj"] register_rest_view( GrammaticalSearchOptionsView, wordseer, 'wordmenu_view', 'grammatical_search_option', parents=["project"], )
"""Adds the properties of each sentence to the dictionary being sent to the client.""" for property in sentence.properties: if property.property_metadata.is_category: result[property.name] = property.value def make_sentence_dict(self, sentence, matching_words): sentence_dict = {} sentence_dict["words"] = self.make_sentence_html( sentence, matching_words) sentence_dict["text"] = sentence.text return sentence_dict def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( SentencesView, wordseer, 'sentences_view', 'sentence', parents=["project", "document", "unit"], )
values.append(value) matching_sentences = db.session.query( PropertyOfSentence.sentence_id.label("sentence_id")).\ join(Property, PropertyOfSentence.property_id == Property.id).\ filter(Property.name == property_name).\ filter(Property.value.in_(values)).subquery() else: for values in value_list: matching_sentences = db.session.query( PropertyOfSentence.sentence_id.label("sentence_id")).\ join(Property, PropertyOfSentence.property_id == Property.id).\ filter(Property.name == property_name).\ filter(Property.value >= values[0]).\ filter(Property.value <= values[1]).\ subquery() filtered_sentences = filtered_sentences.join( matching_sentences, Sentence.id == matching_sentences.c.sentence_id) return filtered_sentences def put(self, id): pass register_rest_view(QueryCacheView, wordseer, 'cache_view', 'cache', pk="query_id", parents=["project"])
df = db.session.query(WordCount.document_count).\ filter(WordCount.word_id == word.id).\ filter(WordCount.project_id == params["project_id"])[0][0] idf = alldocs / df row["score_sentences"] = tf * math.log(idf) response["Words"].append(row) # sort by tf*idf response["Words"] = sorted(response["Words"], key=lambda k: k['score_sentences'], reverse=True) return jsonify(response) def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( AssociatedWordsView, wordseer, 'associated_words_view', 'associated_word', parents=["project"], )
literal_column("'phrase'").label("class"), func.count(SequenceInSentence.sentence_id).\ label("sentence_count") ).\ filter(Sequence.project == self.project).\ filter(SequenceInSentence.sequence_id == Sequence.id).\ filter(Sequence.lemmatized.in_(lemmatized_vals)).\ filter(self.sequence_filter).\ group_by(Sequence.sequence).\ order_by(desc("sentence_count")).\ order_by(asc(Sequence.length)).\ limit(50) sequence_list = {} for sequence in sequences: text = sequence.text.lower() # print text if text not in sequence_list and text is not None: sequence_list[text] = 1 suggested_sequences.append(sequence._asdict()) return suggested_sequences register_rest_view( AutoSuggest, wordseer, 'searchsuggestions', 'searchsuggestion', parents=["project"] )
results = [] for sequence in counts: results.append(sequence._asdict()) return jsonify(results=results) def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( SequencesView, wordseer, 'sequences_view', 'sequence', parents=["project"], ) register_rest_view( ContainingSequencesView, wordseer, 'containing_sequences_view', 'containing_sequence', parents=["project"], )
elif view_type == "tree": metadata["children"].append(property_value) if view_type == "tree": results.append(metadata) return jsonify(children = results) def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( PropertiesMetaView, wordseer, 'properties_meta_view', 'meta_property', parents=["project", "document", "sentence"], ) register_rest_view( PropertiesView, wordseer, 'properties_view', 'property', parents=["project", "document", "sentence"], )
# need to aggregate any duplicate props # that are associated with different unit_ids for prop in Property.query.filter( Property.name == property.name, Property.value == property.value, Property.project_id == project_id ): total += len(prop.sentences_with_property) values = [total, property.value] values.extend([0] * num_queries) results[property.name][property.value] = values results[property.name][property.value][2 + query_index] += 1 def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( MetadataFrequenciesView, wordseer, 'metdata_frequencies_view', 'metadata_frequency', parents=["project"] )
"is_lemmatized": 1 if is_lemmatized else 0, "count": word.sentence_count, } if "query_id" in params: result["id"] = "." + str(word.id) if is_lemmatized else str( word.id) result["document_count"] = word.document_count else: result["id"] = str(word.word_id) results.append(result) return results def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( WordsView, wordseer, 'words_view', 'word', parents=["project"], )
results = [] for sequence in counts: results.append(sequence._asdict()); return jsonify(results = results) def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( SequencesView, wordseer, 'sequences_view', 'sequence', parents=["project"], ) register_rest_view( ContainingSequencesView, wordseer, 'containing_sequences_view', 'containing_sequence', parents=["project"], )
PropertyOfSentence.sentence_id.label("sentence_id")).\ join(Property, PropertyOfSentence.property_id == Property.id).\ filter(Property.name == property_name).\ filter(Property.value.in_(values)).subquery() else: for values in value_list: matching_sentences = db.session.query( PropertyOfSentence.sentence_id.label("sentence_id")).\ join(Property, PropertyOfSentence.property_id == Property.id).\ filter(Property.name == property_name).\ filter(Property.value >= values[0]).\ filter(Property.value <= values[1]).\ subquery() filtered_sentences = filtered_sentences.join( matching_sentences, Sentence.id == matching_sentences.c.sentence_id) return filtered_sentences def put(self, id): pass register_rest_view( QueryCacheView, wordseer, 'cache_view', 'cache', pk="query_id", parents=["project"] )
self.annotation_id = request.args.get("annotation", type=int) self.itemtype = request.args.get("itemType") self.note_id = request.args.get("noteID", type=int) self.text = request.args.get("text") self.new_name = request.args.get("newName") self.new_parent = request.args.get("newParent") self.merge_into = request.args.get("mergeInto") result = self.operations[self.operation](self) if type(result) == dict: return jsonify(result) else: return jsonify(results = result) def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( SetsView, wordseer, 'sets_view', 'set', parents=["project"], )
for center_string in wordtree_center_strings] for sentence in query.sentences: left = {"id": sentence.id, "sentence": []} right = {"id": sentence.id, "sentence": []} tokens = map(lambda x: x.lower(), word_tokenize(sentence.text)) matched = False for center_string_token_list in center_string_token_lists: if not matched: for start_index in range(len(tokens) - len(center_string_token_list)): candidate = tokens[start_index : start_index + len(center_string_token_list)] if candidate == center_string_token_list: lefts = tokens[:start_index] lefts.reverse() left["sentence"] = lefts right["sentence"] = tokens[start_index + len(center_string_token_list) : ] matched = True break tree_data["concordance"]["lefts"].append(left) tree_data["concordance"]["rights"].append(right) return jsonify(tree_data) register_rest_view( WordTreeView, wordseer, 'wordtree', 'wordtree', parents=["project"], )
Sequence.lemmatized, literal_column("'phrase'").label("class"), func.count(SequenceInSentence.sentence_id).\ label("sentence_count") ).\ filter(Sequence.project == self.project).\ filter(SequenceInSentence.sequence_id == Sequence.id).\ filter(Sequence.lemmatized.in_(lemmatized_vals)).\ filter(self.sequence_filter).\ group_by(Sequence.sequence).\ order_by(desc("sentence_count")).\ order_by(asc(Sequence.length)).\ limit(50) sequence_list = {} for sequence in sequences: text = sequence.text.lower() # print text if text not in sequence_list and text is not None: sequence_list[text] = 1 suggested_sequences.append(sequence._asdict()) return suggested_sequences register_rest_view(AutoSuggest, wordseer, 'searchsuggestions', 'searchsuggestion', parents=["project"])
total = 0 # need to aggregate any duplicate props # that are associated with different unit_ids for prop in Property.query.filter( Property.name == property.name, Property.value == property.value, Property.project_id == project_id): total += len(prop.sentences_with_property) values = [total, property.value] values.extend([0] * num_queries) results[property.name][property.value] = values results[property.name][property.value][2 + query_index] += 1 def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view(MetadataFrequenciesView, wordseer, 'metdata_frequencies_view', 'metadata_frequency', parents=["project"])
metadata["children"].append(property_value) if view_type == "tree": results.append(metadata) return jsonify(children=results) def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( PropertiesMetaView, wordseer, 'properties_meta_view', 'meta_property', parents=["project", "document", "sentence"], ) register_rest_view( PropertiesView, wordseer, 'properties_view', 'property', parents=["project", "document", "sentence"], )
right = {"id": sentence.id, "sentence": []} tokens = map(lambda x: x.lower(), word_tokenize(sentence.text)) matched = False for center_string_token_list in center_string_token_lists: if not matched: for start_index in range( len(tokens) - len(center_string_token_list)): candidate = tokens[start_index:start_index + len(center_string_token_list)] if candidate == center_string_token_list: lefts = tokens[:start_index] lefts.reverse() left["sentence"] = lefts right["sentence"] = tokens[ start_index + len(center_string_token_list):] matched = True break tree_data["concordance"]["lefts"].append(left) tree_data["concordance"]["rights"].append(right) return jsonify(tree_data) register_rest_view( WordTreeView, wordseer, 'wordtree', 'wordtree', parents=["project"], )
] ) html.append(word_html) html.append("</span>") return "".join(html) def add_metadata_properties(self, sentence, result): """Adds the properties of each sentence to the dictionary being sent to the client.""" for property in sentence.properties: if property.property_metadata.is_category: result[property.name] = property.value def make_sentence_dict(self, sentence, matching_words): sentence_dict = {} sentence_dict["words"] = self.make_sentence_html(sentence, matching_words) sentence_dict["text"] = sentence.text return sentence_dict def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view(SentencesView, wordseer, "sentences_view", "sentence", parents=["project", "document", "unit"])
filter(WordCount.word_id == word.id).\ filter(WordCount.project_id == params["project_id"])[0][0] idf = alldocs / df row["score_sentences"] = tf * math.log(idf) response["Words"].append(row) # sort by tf*idf response["Words"] = sorted(response["Words"], key=lambda k: k['score_sentences'], reverse=True) return jsonify(response) def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( AssociatedWordsView, wordseer, 'associated_words_view', 'associated_word', parents=["project"], )
info[property.name] = property.value info["metadata"] = self.make_unit_metadata(document) self.add_unit_info(document, info) return jsonify(info) def post(self): pass def delete(self, id): pass def put(self, id): pass register_rest_view( DocumentsView, wordseer, 'documents_view', 'document', parents=["project"], ) register_rest_view( SingleDocumentView, wordseer, 'single_document_view', 'document_content', parents=["project"], )