Ejemplo n.º 1
0
    def test_model_unit(self):
        """Test to make sure that Unit is working properly.
        """

        unit_type = Property(name="unit_type", value="section")
        number = 1

        unit = Unit()

        unit.properties = [unit_type]
        unit.number = number

        assert unit.number == number

        sentence = Sentence()
        sentence.words = [Word(lemma="hello"), Word(lemma="world")]
        prop = Property(name="title", value="Hello World")

        unit.sentences.append(sentence)
        unit.properties.append(prop)

        assert unit.sentences == [sentence]
        assert unit.properties.all() == [unit_type, prop]

        unit.save()
        prop.save()

        retrieved_prop = Property.query.filter(Property.name=="title").\
            filter(Property.value == "Hello World").first()

        assert retrieved_prop.unit.type == "unit"
        assert retrieved_prop.unit.number == unit.number
Ejemplo n.º 2
0
def load_sentences():
    """Load the content of data.yml into the English / Latin tables"""
    yaml = open('data/quiz_data.yml')
    data = ruamel.yaml.load(yaml, ruamel.yaml.RoundTripLoader)
    print data

    for topic_name, quiz in data.items():
        topic = (Topic.query.filter_by(name=topic_name).first()
                 or Topic(name=topic_name))
        print topic
        topic.save()

        for quiz_name, sentences in quiz.items():
            quiz = Quiz(name=quiz_name, topic=topic)
            print quiz
            quiz.save()

            for question, answers in sentences.items():
                type = answers.pop(0)['type']

                q = Sentence(type=type, text=question, quiz=quiz)

                for answer in answers:
                    a = Sentence(text=answer, )
                    q.translations.append(a)
                    a.translations.append(q)

                db.session.add(q)
                db.session.commit()
Ejemplo n.º 3
0
    def test_model_unit(self):
        """Test to make sure that Unit is working properly.
        """

        unit_type = Property(name="unit_type", value="section")
        number = 1

        unit = Unit()

        unit.properties = [unit_type]
        unit.number = number

        assert unit.number == number

        sentence = Sentence()
        sentence.words = [Word(word="hello"), Word(word="world")]
        prop = Property(name="title", value="Hello World")

        unit.sentences.append(sentence)
        unit.properties.append(prop)

        assert unit.sentences == [sentence]
        assert unit.properties.all() == [unit_type, prop]

        unit.save()
        prop.save()

        retrieved_prop = Property.query.filter(Property.name=="title").\
            filter(Property.value == "Hello World").first()

        assert retrieved_prop.unit.type == "unit"
        assert retrieved_prop.unit.number == unit.number
Ejemplo n.º 4
0
        def find_sentence(word):
            if len(word.sentence) < 3:
                url = "https://twinword-word-graph-dictionary.p.rapidapi.com/example/"
                querystring = {"entry": word.word}
                headers = {
                    'x-rapidapi-host':
                    "twinword-word-graph-dictionary.p.rapidapi.com",
                    'x-rapidapi-key': os.getenv('RAPID_API_KEY')
                }
                response = requests.request("GET",
                                            url,
                                            headers=headers,
                                            params=querystring)
                raw_sentence_response = response.json()

                while True:
                    index_choice = random.choice(
                        range(len(raw_sentence_response["example"])))
                    sentence_response = raw_sentence_response["example"][
                        index_choice]
                    if Sentence.query.filter_by(
                            example=sentence_response).count() == 0:
                        break

                sentence = Sentence(example=sentence_response,
                                    from_api=True,
                                    word_id=word.id)
                sentence.save()

                return sentence.example

            else:
                return random.choice(word.sentence).example
Ejemplo n.º 5
0
    def test_add_sequence(self):
        """Test the ``add_sequence()`` method of ``Sentence``.
        """

        project = Project()
        sentence = Sentence(text="foo", project=project)
        sequence = Sequence(lemmatized=False)

        project.save()
        sentence.save()
        sequence.save()

        rel = sentence.add_sequence(sequence, position=1, project=project)

        assert rel.sequence == sequence
        assert rel.sentence == sentence
        assert rel.position == 1
        assert rel.project == project
Ejemplo n.º 6
0
    def test_add_sequence(self):
        """Test the ``add_sequence()`` method of ``Sentence``.
        """

        sentence = Sentence(text="foo")
        sequence = Sequence(lemmatized=False)
        project = Project()

        project.save()
        sentence.save()
        sequence.save()

        rel = sentence.add_sequence(sequence, position=1, project=project)

        assert rel.sequence == sequence
        assert rel.sentence == sentence
        assert rel.position == 1
        assert rel.project == project
Ejemplo n.º 7
0
    def test_add_word(self):
        """Test the ``add_word()`` method of ``Sentence``.
        """

        sentence = Sentence(text="foo")
        word = Word(word="foo")
        project = Project()

        project.save()
        sentence.save()
        word.save()

        rel = sentence.add_word(word, position=4, space_before=" ",
            part_of_speech="ADF", project=project)

        assert rel.word == word
        assert rel.sentence == sentence
        assert rel.position == 4
        assert rel.space_before == " "
        assert rel.part_of_speech == "ADF"
        assert rel.project == project
Ejemplo n.º 8
0
def insert_into_db():
    for episode_link in get_all_episode_links():
        season, episode, sentences = parse_episode(episode_link)
        for sentence in sentences:
            if sentence.text:
                sen = Sentence(
                    season=season,
                    episode=episode,
                    sentence=sentence.text.strip()
                )
                db.session.add(sen)
        db.session.commit()
Ejemplo n.º 9
0
    def test_add_dependency(self):
        """Test the ``add_dependency()`` method of ``Sentence``.
        """

        sentence = Sentence(text="foo")
        word = Word(word="foo")
        dependency = Dependency(governor=word)
        project = Project()

        project.save()
        sentence.save()
        dependency.save()
        word.save()

        rel = sentence.add_dependency(dependency, governor_index=1,
            dependent_index=2, project=project)

        assert rel.dependency == dependency
        assert rel.sentence == sentence
        assert rel.governor_index == 1
        assert rel.dependent_index == 2
        assert rel.project == project
Ejemplo n.º 10
0
    def test_add_word(self):
        """Test the ``add_word()`` method of ``Sentence``.
        """

        project = Project()
        sentence = Sentence(text="foo", project=project)
        word = Word(lemma="foo")

        project.save()
        sentence.save()
        word.save()

        rel = sentence.add_word(word,
                                position=4,
                                space_before=" ",
                                project=project)

        assert rel.word == word
        assert rel.sentence == sentence
        assert rel.position == 4
        assert rel.space_before == " "
        assert rel.project == project
Ejemplo n.º 11
0
def new_question():
    form = SentenceForm()
    if form.validate_on_submit():
        sentence = Sentence(question=form.question.data,
                            answer=form.answer.data,
                            author=current_user)
        db.session.add(sentence)
        db.session.commit()
        flash('Your question has been created!', 'success')
        return redirect(url_for('questions'))
    return render_template('create_question.html',
                           title='New Question',
                           form=form,
                           legend='New Question')
Ejemplo n.º 12
0
    def test_add_dependency(self):
        """Test the ``add_dependency()`` method of ``Sentence``.
        """

        project = Project()
        sentence = Sentence(text="foo", project=project)
        word = Word(lemma="foo")
        dependency = Dependency(governor=word)

        project.save()
        sentence.save()
        dependency.save()
        word.save()

        rel = sentence.add_dependency(dependency,
                                      governor_index=1,
                                      dependent_index=2,
                                      project=project)

        assert rel.dependency == dependency
        assert rel.sentence == sentence
        assert rel.governor_index == 1
        assert rel.dependent_index == 2
        assert rel.project == project
Ejemplo n.º 13
0
    def test_model_sentence(self):
        """Test to make sure that Sentence is working properly.
        """

        text = "hello world"
        sentence = Sentence()
        sentence.text = text

        assert sentence.text == text

        word_1 = Word(lemma="hello")
        word_2 = Word(lemma="world")

        sentence.words.append(word_1)
        sentence.words.append(word_2)

        assert sentence.words == [word_1, word_2]

        sentence.save()

        #Test with Dependencies
        dependency1 = Dependency()
        dependency2 = Dependency()

        sentence.dependencies = [dependency1, dependency2]

        db.session.add_all([dependency1, dependency2])
        db.session.commit()

        #Test with Sequences
        sequence1 = Sequence()
        sequence2 = Sequence()

        sentence.sequences = [sequence1, sequence2]

        db.session.add_all([sequence1, sequence2])
        db.session.commit()

        #Test with Project
        project = Project()
        sentence.project = project
        db.session.add_all([project])
        db.session.commit()
Ejemplo n.º 14
0
def read_chinese_sentences():
    """
    Reads sentences from txt and creates Sentence objects
    """
    location = os.path.join(os.getcwd(), 'files')
    with open(os.path.join(location, 'sentences.txt'), encoding='utf-8') as f:
        raw = f.read()
        lines = raw.split('\n')
        structures = set(len(l.split('\t')) for l in lines)
        assert len(structures) == 1
        for l in lines:
            data = l.split('\t')
            fields = {
                'zi_simp': get_chinese(data[0]),
                'pinyin_tone': data[1],
                'english': data[2],
            }
            s = Sentence(**fields)
            db.session.add(s)
    db.session.commit()
Ejemplo n.º 15
0
    def test_model_sentence(self):
        """Test to make sure that Sentence is working properly.
        """

        text = "hello world"
        sentence = Sentence()
        sentence.text = text

        assert sentence.text == text

        word_1 = Word(word="hello")
        word_2 = Word(word="world")

        sentence.words.append(word_1)
        sentence.words.append(word_2)

        assert sentence.words == [word_1, word_2]

        sentence.save()

        #Test with Dependencies
        dependency1 = Dependency()
        dependency2 = Dependency()

        sentence.dependencies = [dependency1, dependency2]

        db.session.add_all([dependency1, dependency2])
        db.session.commit()

        #Test with Sequences
        sequence1 = Sequence()
        sequence2 = Sequence()

        sentence.sequences = [sequence1, sequence2]

        db.session.add_all([sequence1, sequence2])
        db.session.commit()
Ejemplo n.º 16
0
def xmlImport(inputFile):
    def named_parameters(model):
        columns = [m.key for m in model.__table__.columns]
        return columns

    # Truncate DB
    Sentence.query.delete()
    Entity.query.delete()
    Interaction.query.delete()
    OntologyAnnotation.query.delete()
    db.session.commit()

    tree = ET.parse(inputFile)
    root = tree.getroot()
    for sentence in root:
        sentence_attr = {
            el.tag: el.text
            for el in sentence
            if el.text and el.tag in named_parameters(Sentence)
        }

        # Entity
        entities = []
        for entity in sentence.iter("entity"):
            entity_attr = {
                el.tag: el.text
                for el in entity
                if el.text and el.tag in named_parameters(Entity)
            }

            # Ontology Annotation
            ontologyAnnotations = []
            for ontologyAnnotation in entity.iter("ontologyAnnotation"):
                ontologyAnnotation_attr = {
                    el.tag: el.text
                    for el in ontologyAnnotation if el.text
                    and el.tag in named_parameters(OntologyAnnotation)
                }
                ontologyAnnotation_attr["default"] = (
                    ontologyAnnotation_attr["default"] == 'True')
                ontologyAnnotations.append(
                    OntologyAnnotation(**ontologyAnnotation_attr))

            if not "comment" in entity_attr: entity_attr["comment"] = ""

            entity_attr["ontologyAnnotations"] = ontologyAnnotations
            entities.append(Entity(**entity_attr))

        # Interaction
        interactions = []
        for interaction in sentence.iter("interaction"):
            interaction_attr = {
                el.tag: el.text
                for el in interaction
                if el.text and el.tag in named_parameters(Interaction)
            }
            if not "comment" in interaction_attr:
                interaction_attr["comment"] = ""
            interactions.append(Interaction(**interaction_attr))

        sentence_attr["entities"] = entities
        sentence_attr["interactions"] = interactions
        if not "comment" in sentence_attr: sentence_attr["comment"] = ""

        s = Sentence(**sentence_attr)

        db.session.add(s)

    db.session.commit()
    def tweetSentimentAnalysis(self):

        outFile = codecs.open("testTweetsLineByLine.txt", 'w', "utf-8")
        # Read all the tweets from "testTweets.txt" and
        # split + store them to tweets_list
        tweets = codecs.open("testTweets.txt", 'r', "utf-8").read()
        tweets_list = tweets.split('\n')
        #tweets.close()           - AttributeError: 'str' object has no attribute 'close'

        positive_counter = 0
        negative_counter = 0
        sentences = []
        #implementation of topic modelling
        for idx, tweet in enumerate(tweets_list):
            if (len(tweet)):
                print(tweet)
                stop = set(stopwords.words('english'))
                exclude = set(string.punctuation)
                lemma = WordNetLemmatizer()

                def clean(data):
                    stop_free = " ".join(
                        [i for i in data.lower().split() if i not in stop])
                    punc_free = ''.join(ch for ch in stop_free
                                        if ch not in exclude)
                    normalized = " ".join(
                        lemma.lemmatize(word) for word in punc_free.split())
                    return normalized

                tweet_clean = [clean(tweet).split()]
                print(tweet_clean)

                dictionary = corpora.Dictionary(tweet_clean)
                doc_term_matrix = [
                    dictionary.doc2bow(doc) for doc in tweet_clean
                ]

                Lda = gensim.models.ldamodel.LdaModel
                ldamodel = Lda(doc_term_matrix,
                               num_topics=1,
                               id2word=dictionary,
                               passes=50)
                ldamodel = ldamodel.print_topics(num_topics=1, num_words=3)
                words = ldamodel[0][1]
                words = words.split("+")
                import re
                topicModelingValues = []
                for word in words:
                    word = re.sub(r'[?|$|.|!]', r'', word)
                    word = re.sub(r'[^a-zA-Z0-9 ]', r'', word)
                    result = ''.join(
                        [word for word in word if not word.isdigit()])
                    topicModelingValues.append(result)
        # Call outputo`unt() on each tweet stored in tweets_list and
        # increment positive_counter and negative_counter accordingly
                p, n = self.posNegCount(tweet)
                positive_counter += p
                negative_counter += n
                temp = p - n
                dataToBeSaved = "tweet with idx : --- {}  --- \n , have the following postivity : {} \n\n".format(
                    tweet, temp)
                #print(dataToBeSaved.encode("utf-8"))
                outFile.write(dataToBeSaved)
                ner = self.nerfun(tweet)
                if p > n:
                    sentiment_sent = "POSITIVE"
                elif p < n:
                    sentiment_sent = "NEGATIVE"
                else:
                    sentiment_sent = "NEUTRAL"
                ts = time.time()
                sentences.append(
                    Sentence(ts, tweet, sentiment_sent,
                             str(datetime.datetime.now()), ner,
                             topicModelingValues, "temp"))
        outFile.close()

        if positive_counter > negative_counter:
            sentiment = "POSITIVE"

        elif positive_counter < negative_counter:
            sentiment = "NEGATIVE"

        else:
            sentiment = "NEUTRAL"

        valuesSum = positive_counter + negative_counter
        neural_counter = len(tweets_list) - valuesSum
        output = {
            'positive': positive_counter,
            'negative': negative_counter,
            'sentiment': sentiment,
            'valuesSum': valuesSum,
            'neural': neural_counter
        }

        blockchain = Blockchain()
        ts = time.time()
        call = blockchain.add_topic(id=ts,
                                    keyword=self.keyword,
                                    sentiment_result=output,
                                    date=str(datetime.datetime.now()))
        topicId = str(ts)
        for sent in sentences:
            sent.topicId = topicId
            blockchain.add_sentence(sent.id, sent.content,
                                    sent.sentiment_result, sent.date, sent.ner,
                                    sent.topicModelingValues, sent.topicId)

        print(
            "//////////////////////////////////////////////////////////////////////////"
        )
        print("positive_counter:", positive_counter, "negative_counter:",
              negative_counter, "valuesSum", valuesSum)
        print(
            "//////////////////////////////////////////////////////////////////////////"
        )
        # Hopefully, this is self-explanatory

        return (output)
Ejemplo n.º 18
0
def createBlock(blockLines):
    headLineComponents = blockLines[0].split("\t")

    ids = headLineComponents[1].split("__")
    pmid = ids[0]
    sentenceID = ids[1]
    sentence = headLineComponents[2]
    score = float(headLineComponents[3])

    # add Sentence
    s = Sentence(pubmedID=pmid,
                 sentenceID=sentenceID,
                 literal=sentence,
                 score=score,
                 grade=defaultGrade,
                 comment=defaultComment)
    db.session.add(s)

    for line in blockLines[1:]:
        lineComponents = line.split("\t")

        # Possible forms:
        # PROTEIN_EXACT\tstart end\tword\tdatabase id
        # PROTEIN_GENIA\tstart end\tword\tPROTEIN_REFLECT\tstart end\tword\tdatabase id
        # PATTERN\tstart end\tinteraction_type

        try:
            kind = lineComponents[0]

            if kind == "PATTERN":
                start, end = startEnd(lineComponents[1])
                type = lineComponents[2]

                # add Interaction
                i = Interaction(type=type,
                                start=start,
                                end=end,
                                sentence=s,
                                grade=defaultGrade,
                                comment=defaultComment)
                db.session.add(i)
            else:
                type, software = typeSoftware(
                    kind.replace("GO_PROCESS",
                                 "GO-PROCESS").replace("CHEM", "CHEMICAL"))

                if type.upper() != "PROTEIN" or software.upper() == "EXACT":
                    start, end = startEnd(lineComponents[1])
                    name = lineComponents[2]
                    databaseID = lineComponents[3]

                    # add Entity
                    e = Entity(type=type,
                               software=software,
                               name=name,
                               databaseID=databaseID,
                               start=start,
                               end=end,
                               sentence=s,
                               grade=defaultGrade,
                               comment=defaultComment)
                    db.session.add(e)
                else:
                    # partial overlap
                    databaseID = lineComponents[6]

                    type1, software1 = typeSoftware(lineComponents[0])
                    start1, end1 = startEnd(lineComponents[1])
                    name1 = lineComponents[2]

                    # add Entity
                    e1 = Entity(type=type1,
                                software=software1,
                                name=name1,
                                databaseID=databaseID,
                                start=start1,
                                end=end1,
                                sentence=s,
                                grade=defaultGrade,
                                comment=defaultComment)
                    db.session.add(e1)

                    type2, software2 = typeSoftware(lineComponents[3])
                    start2, end2 = startEnd(lineComponents[4])
                    name2 = lineComponents[5]

                    # add Entity
                    e2 = Entity(type=type2,
                                software=software2,
                                name=name2,
                                databaseID=databaseID,
                                start=start2,
                                end=end2,
                                sentence=s,
                                grade=defaultGrade,
                                comment=defaultComment)
                    db.session.add(e2)

        except:
            raise "Error: {}\n\nLine Components: {}".format(
                sys.exc_info()[0], lineComponents)

    db.session.commit()