Beispiel #1
0
                "word": word.lemma if is_lemmatized else word.word,
                "is_lemmatized": 1 if is_lemmatized else 0,
                "count": word.sentence_count,
            }
            if "query_id" in params: 
                result["id"] = "." + str(word.id) if is_lemmatized else str(word.id)
                result["document_count"] = word.document_count
            else: 
                result["id"] = str(word.word_id)

            results.append(result)
        return results

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(
    WordsView,
    wordseer,
    'words_view',
    'word',
    parents=["project"],
)
Beispiel #2
0
        for property in document.properties:
            info[property.name] = property.value
        info["metadata"] = self.make_unit_metadata(document)
        self.add_unit_info(document, info)
        
        return jsonify(info)

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass

register_rest_view(
    DocumentsView,
    wordseer,
    'documents_view',
    'document',
    parents=["project"],
)

register_rest_view(
    SingleDocumentView,
    wordseer,
    'single_document_view',
    'document_content',
    parents=["project"],
)
Beispiel #3
0
            self.add_relation_to_response(response, "dep", relation)

        response["search"] = WordInSentence.query.\
            filter(WordInSentence.project_id == project.id).\
            filter(WordInSentence.word_id.in_(word_ids)).count()

        return jsonify(response)


GRAMMATICAL_RELATION_GROUPS = [
    "amod advmod acomp", "agent subj nsubj xsubj csubj nsubjpass csubjpass",
    "dobj iobj pobj",
    "prep_because prep_because_of prep_on_account_of prep_owing_to prepc_because prepc_because_of prepc_on_account_of prepc_owing_to",
    "conj_and", "prep_with prepc_with prep_by_means_of prepc_by_means_of",
    "prep pobj", "prep_to", "prep_from", "prep_of", "prep_on", "prep_by",
    "prep_in", "abbrev", "acomp", "advcl", "advmod", "agent", "amod", "appos",
    "attr", "aux", "auxpass", "cc", "ccomp", "complm", "conj", "cop", "csubj",
    "csubjpass", "dep", "det", "dobj", "expl", "infmod", "iobj", "mark", "mwe",
    "neg", "nn", "npadvmod", "nsubj", "nsubjpass", "num", "number",
    "parataxis", "partmod", "pcomp", "pobj", "poss", "preconj", "predet",
    "prep", "prepc", "prt", "punct", "purpcl", "quantmod", "rcmod", "ref",
    "rel", "root", "tmod", "xcomp", "xsubj"
]

register_rest_view(
    GrammaticalSearchOptionsView,
    wordseer,
    'wordmenu_view',
    'grammatical_search_option',
    parents=["project"],
)
Beispiel #4
0
    "num",
    "number",
    "parataxis",
    "partmod",
    "pcomp",
    "pobj",
    "poss",
    "preconj",
    "predet",
    "prep",
    "prepc",
    "prt",
    "punct",
    "purpcl",
    "quantmod",
    "rcmod",
    "ref",
    "rel",
    "root",
    "tmod",
    "xcomp",
    "xsubj"]


register_rest_view(
    GrammaticalSearchOptionsView,
    wordseer,
    'wordmenu_view',
    'grammatical_search_option',
    parents=["project"],
)
Beispiel #5
0
        """Adds the properties of each sentence to the dictionary being sent to
        the client."""
        for property in sentence.properties:
            if property.property_metadata.is_category:
                result[property.name] = property.value

    def make_sentence_dict(self, sentence, matching_words):
        sentence_dict = {}
        sentence_dict["words"] = self.make_sentence_html(
            sentence, matching_words)
        sentence_dict["text"] = sentence.text
        return sentence_dict

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(
    SentencesView,
    wordseer,
    'sentences_view',
    'sentence',
    parents=["project", "document", "unit"],
)
Beispiel #6
0
                    values.append(value)
                matching_sentences = db.session.query(
                    PropertyOfSentence.sentence_id.label("sentence_id")).\
                join(Property, PropertyOfSentence.property_id == Property.id).\
                filter(Property.name == property_name).\
                filter(Property.value.in_(values)).subquery()
            else:
                for values in value_list:
                    matching_sentences = db.session.query(
                        PropertyOfSentence.sentence_id.label("sentence_id")).\
                    join(Property, PropertyOfSentence.property_id == Property.id).\
                        filter(Property.name == property_name).\
                        filter(Property.value >= values[0]).\
                        filter(Property.value <= values[1]).\
                        subquery()
            filtered_sentences = filtered_sentences.join(
                matching_sentences,
                Sentence.id == matching_sentences.c.sentence_id)
        return filtered_sentences

    def put(self, id):
        pass


register_rest_view(QueryCacheView,
                   wordseer,
                   'cache_view',
                   'cache',
                   pk="query_id",
                   parents=["project"])
            df = db.session.query(WordCount.document_count).\
                filter(WordCount.word_id == word.id).\
                filter(WordCount.project_id == params["project_id"])[0][0]
            idf = alldocs / df
            row["score_sentences"] = tf * math.log(idf)
            response["Words"].append(row)

        # sort by tf*idf
        response["Words"] = sorted(response["Words"], key=lambda k: k['score_sentences'], reverse=True)

        return jsonify(response)


    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(
    AssociatedWordsView,
    wordseer,
    'associated_words_view',
    'associated_word',
    parents=["project"],
)
Beispiel #8
0
            literal_column("'phrase'").label("class"),
            func.count(SequenceInSentence.sentence_id).\
                label("sentence_count")
        ).\
        filter(Sequence.project == self.project).\
        filter(SequenceInSentence.sequence_id == Sequence.id).\
        filter(Sequence.lemmatized.in_(lemmatized_vals)).\
        filter(self.sequence_filter).\
        group_by(Sequence.sequence).\
        order_by(desc("sentence_count")).\
        order_by(asc(Sequence.length)).\
        limit(50)

        sequence_list = {}
        for sequence in sequences:
            text = sequence.text.lower()
            # print text
            if text not in sequence_list and text is not None:
                sequence_list[text] = 1
                suggested_sequences.append(sequence._asdict())

        return suggested_sequences

register_rest_view(
    AutoSuggest,
    wordseer,
    'searchsuggestions',
    'searchsuggestion',
    parents=["project"]
)
Beispiel #9
0
        results = []
        for sequence in counts:
            results.append(sequence._asdict())
        return jsonify(results=results)

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(
    SequencesView,
    wordseer,
    'sequences_view',
    'sequence',
    parents=["project"],
)

register_rest_view(
    ContainingSequencesView,
    wordseer,
    'containing_sequences_view',
    'containing_sequence',
    parents=["project"],
)
Beispiel #10
0
                elif view_type == "tree":
                    metadata["children"].append(property_value)
            if view_type == "tree":
                results.append(metadata)
        return jsonify(children = results)

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass

register_rest_view(
    PropertiesMetaView,
    wordseer,
    'properties_meta_view',
    'meta_property',
    parents=["project", "document", "sentence"],
)

register_rest_view(
    PropertiesView,
    wordseer,
    'properties_view',
    'property',
    parents=["project", "document", "sentence"],
)
                    # need to aggregate any duplicate props
                    # that are associated with different unit_ids
                    for prop in Property.query.filter(
                        Property.name == property.name,
                        Property.value == property.value,
                        Property.project_id == project_id
                    ):
                        total += len(prop.sentences_with_property)

                    values = [total, property.value]
                    values.extend([0] * num_queries)
                    results[property.name][property.value] = values
                results[property.name][property.value][2 + query_index] += 1

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass

register_rest_view(
    MetadataFrequenciesView,
    wordseer,
    'metdata_frequencies_view',
    'metadata_frequency',
    parents=["project"]
)
Beispiel #12
0
                "is_lemmatized": 1 if is_lemmatized else 0,
                "count": word.sentence_count,
            }
            if "query_id" in params:
                result["id"] = "." + str(word.id) if is_lemmatized else str(
                    word.id)
                result["document_count"] = word.document_count
            else:
                result["id"] = str(word.word_id)

            results.append(result)
        return results

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(
    WordsView,
    wordseer,
    'words_view',
    'word',
    parents=["project"],
)
Beispiel #13
0
        results = []
        for sequence in counts:
            results.append(sequence._asdict());
        return jsonify(results = results)

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass

register_rest_view(
    SequencesView,
    wordseer,
    'sequences_view',
    'sequence',
    parents=["project"],
)

register_rest_view(
    ContainingSequencesView,
    wordseer,
    'containing_sequences_view',
    'containing_sequence',
    parents=["project"],
)
Beispiel #14
0
                    PropertyOfSentence.sentence_id.label("sentence_id")).\
                join(Property, PropertyOfSentence.property_id == Property.id).\
                filter(Property.name == property_name).\
                filter(Property.value.in_(values)).subquery()
            else:
                for values in value_list:
                    matching_sentences = db.session.query(
                        PropertyOfSentence.sentence_id.label("sentence_id")).\
                    join(Property, PropertyOfSentence.property_id == Property.id).\
                        filter(Property.name == property_name).\
                        filter(Property.value >= values[0]).\
                        filter(Property.value <= values[1]).\
                        subquery()
            filtered_sentences = filtered_sentences.join(
                matching_sentences,
                Sentence.id == matching_sentences.c.sentence_id)
        return filtered_sentences

    def put(self, id):
        pass


register_rest_view(
    QueryCacheView,
    wordseer,
    'cache_view',
    'cache',
    pk="query_id",
    parents=["project"]
)
Beispiel #15
0
        self.annotation_id = request.args.get("annotation", type=int)
        self.itemtype = request.args.get("itemType")
        self.note_id = request.args.get("noteID", type=int)
        self.text = request.args.get("text")
        self.new_name = request.args.get("newName")
        self.new_parent = request.args.get("newParent")
        self.merge_into = request.args.get("mergeInto")

        result = self.operations[self.operation](self)
        if type(result) == dict:
            return jsonify(result)
        else:
            return jsonify(results = result)

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass

register_rest_view(
    SetsView,
    wordseer,
    'sets_view',
    'set',
    parents=["project"],
)
Beispiel #16
0
            for center_string in wordtree_center_strings]
        for sentence in query.sentences:
            left = {"id": sentence.id, "sentence": []}
            right = {"id": sentence.id, "sentence": []}

            tokens = map(lambda x: x.lower(), word_tokenize(sentence.text))
            matched = False
            for center_string_token_list in center_string_token_lists:
                if not matched:
                    for start_index in range(len(tokens) - len(center_string_token_list)):
                        candidate = tokens[start_index : start_index + len(center_string_token_list)]
                        if candidate == center_string_token_list:
                            lefts = tokens[:start_index]
                            lefts.reverse()
                            left["sentence"] = lefts
                            right["sentence"] = tokens[start_index + len(center_string_token_list) : ]
                            matched = True
                            break
            tree_data["concordance"]["lefts"].append(left)
            tree_data["concordance"]["rights"].append(right)

        return jsonify(tree_data)


register_rest_view(
    WordTreeView,
    wordseer,
    'wordtree',
    'wordtree',
    parents=["project"],
)
Beispiel #17
0
            Sequence.lemmatized,
            literal_column("'phrase'").label("class"),
            func.count(SequenceInSentence.sentence_id).\
                label("sentence_count")
        ).\
        filter(Sequence.project == self.project).\
        filter(SequenceInSentence.sequence_id == Sequence.id).\
        filter(Sequence.lemmatized.in_(lemmatized_vals)).\
        filter(self.sequence_filter).\
        group_by(Sequence.sequence).\
        order_by(desc("sentence_count")).\
        order_by(asc(Sequence.length)).\
        limit(50)

        sequence_list = {}
        for sequence in sequences:
            text = sequence.text.lower()
            # print text
            if text not in sequence_list and text is not None:
                sequence_list[text] = 1
                suggested_sequences.append(sequence._asdict())

        return suggested_sequences


register_rest_view(AutoSuggest,
                   wordseer,
                   'searchsuggestions',
                   'searchsuggestion',
                   parents=["project"])
Beispiel #18
0
                    total = 0

                    # need to aggregate any duplicate props
                    # that are associated with different unit_ids
                    for prop in Property.query.filter(
                            Property.name == property.name,
                            Property.value == property.value,
                            Property.project_id == project_id):
                        total += len(prop.sentences_with_property)

                    values = [total, property.value]
                    values.extend([0] * num_queries)
                    results[property.name][property.value] = values
                results[property.name][property.value][2 + query_index] += 1

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(MetadataFrequenciesView,
                   wordseer,
                   'metdata_frequencies_view',
                   'metadata_frequency',
                   parents=["project"])
Beispiel #19
0
                    metadata["children"].append(property_value)
            if view_type == "tree":
                results.append(metadata)
        return jsonify(children=results)

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(
    PropertiesMetaView,
    wordseer,
    'properties_meta_view',
    'meta_property',
    parents=["project", "document", "sentence"],
)

register_rest_view(
    PropertiesView,
    wordseer,
    'properties_view',
    'property',
    parents=["project", "document", "sentence"],
)
Beispiel #20
0
            right = {"id": sentence.id, "sentence": []}

            tokens = map(lambda x: x.lower(), word_tokenize(sentence.text))
            matched = False
            for center_string_token_list in center_string_token_lists:
                if not matched:
                    for start_index in range(
                            len(tokens) - len(center_string_token_list)):
                        candidate = tokens[start_index:start_index +
                                           len(center_string_token_list)]
                        if candidate == center_string_token_list:
                            lefts = tokens[:start_index]
                            lefts.reverse()
                            left["sentence"] = lefts
                            right["sentence"] = tokens[
                                start_index + len(center_string_token_list):]
                            matched = True
                            break
            tree_data["concordance"]["lefts"].append(left)
            tree_data["concordance"]["rights"].append(right)

        return jsonify(tree_data)


register_rest_view(
    WordTreeView,
    wordseer,
    'wordtree',
    'wordtree',
    parents=["project"],
)
Beispiel #21
0
                ]
            )
            html.append(word_html)
        html.append("</span>")
        return "".join(html)

    def add_metadata_properties(self, sentence, result):
        """Adds the properties of each sentence to the dictionary being sent to
        the client."""
        for property in sentence.properties:
            if property.property_metadata.is_category:
                result[property.name] = property.value

    def make_sentence_dict(self, sentence, matching_words):
        sentence_dict = {}
        sentence_dict["words"] = self.make_sentence_html(sentence, matching_words)
        sentence_dict["text"] = sentence.text
        return sentence_dict

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(SentencesView, wordseer, "sentences_view", "sentence", parents=["project", "document", "unit"])
Beispiel #22
0
                filter(WordCount.word_id == word.id).\
                filter(WordCount.project_id == params["project_id"])[0][0]
            idf = alldocs / df
            row["score_sentences"] = tf * math.log(idf)
            response["Words"].append(row)

        # sort by tf*idf
        response["Words"] = sorted(response["Words"],
                                   key=lambda k: k['score_sentences'],
                                   reverse=True)

        return jsonify(response)

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(
    AssociatedWordsView,
    wordseer,
    'associated_words_view',
    'associated_word',
    parents=["project"],
)
Beispiel #23
0
            info[property.name] = property.value
        info["metadata"] = self.make_unit_metadata(document)
        self.add_unit_info(document, info)

        return jsonify(info)

    def post(self):
        pass

    def delete(self, id):
        pass

    def put(self, id):
        pass


register_rest_view(
    DocumentsView,
    wordseer,
    'documents_view',
    'document',
    parents=["project"],
)

register_rest_view(
    SingleDocumentView,
    wordseer,
    'single_document_view',
    'document_content',
    parents=["project"],
)