def test_model_unit(self): """Test to make sure that Unit is working properly. """ unit_type = Property(name="unit_type", value="section") number = 1 unit = Unit() unit.properties = [unit_type] unit.number = number assert unit.number == number sentence = Sentence() sentence.words = [Word(lemma="hello"), Word(lemma="world")] prop = Property(name="title", value="Hello World") unit.sentences.append(sentence) unit.properties.append(prop) assert unit.sentences == [sentence] assert unit.properties.all() == [unit_type, prop] unit.save() prop.save() retrieved_prop = Property.query.filter(Property.name=="title").\ filter(Property.value == "Hello World").first() assert retrieved_prop.unit.type == "unit" assert retrieved_prop.unit.number == unit.number
def load_sentences(): """Load the content of data.yml into the English / Latin tables""" yaml = open('data/quiz_data.yml') data = ruamel.yaml.load(yaml, ruamel.yaml.RoundTripLoader) print data for topic_name, quiz in data.items(): topic = (Topic.query.filter_by(name=topic_name).first() or Topic(name=topic_name)) print topic topic.save() for quiz_name, sentences in quiz.items(): quiz = Quiz(name=quiz_name, topic=topic) print quiz quiz.save() for question, answers in sentences.items(): type = answers.pop(0)['type'] q = Sentence(type=type, text=question, quiz=quiz) for answer in answers: a = Sentence(text=answer, ) q.translations.append(a) a.translations.append(q) db.session.add(q) db.session.commit()
def test_model_unit(self): """Test to make sure that Unit is working properly. """ unit_type = Property(name="unit_type", value="section") number = 1 unit = Unit() unit.properties = [unit_type] unit.number = number assert unit.number == number sentence = Sentence() sentence.words = [Word(word="hello"), Word(word="world")] prop = Property(name="title", value="Hello World") unit.sentences.append(sentence) unit.properties.append(prop) assert unit.sentences == [sentence] assert unit.properties.all() == [unit_type, prop] unit.save() prop.save() retrieved_prop = Property.query.filter(Property.name=="title").\ filter(Property.value == "Hello World").first() assert retrieved_prop.unit.type == "unit" assert retrieved_prop.unit.number == unit.number
def find_sentence(word): if len(word.sentence) < 3: url = "https://twinword-word-graph-dictionary.p.rapidapi.com/example/" querystring = {"entry": word.word} headers = { 'x-rapidapi-host': "twinword-word-graph-dictionary.p.rapidapi.com", 'x-rapidapi-key': os.getenv('RAPID_API_KEY') } response = requests.request("GET", url, headers=headers, params=querystring) raw_sentence_response = response.json() while True: index_choice = random.choice( range(len(raw_sentence_response["example"]))) sentence_response = raw_sentence_response["example"][ index_choice] if Sentence.query.filter_by( example=sentence_response).count() == 0: break sentence = Sentence(example=sentence_response, from_api=True, word_id=word.id) sentence.save() return sentence.example else: return random.choice(word.sentence).example
def test_add_sequence(self): """Test the ``add_sequence()`` method of ``Sentence``. """ project = Project() sentence = Sentence(text="foo", project=project) sequence = Sequence(lemmatized=False) project.save() sentence.save() sequence.save() rel = sentence.add_sequence(sequence, position=1, project=project) assert rel.sequence == sequence assert rel.sentence == sentence assert rel.position == 1 assert rel.project == project
def test_add_sequence(self): """Test the ``add_sequence()`` method of ``Sentence``. """ sentence = Sentence(text="foo") sequence = Sequence(lemmatized=False) project = Project() project.save() sentence.save() sequence.save() rel = sentence.add_sequence(sequence, position=1, project=project) assert rel.sequence == sequence assert rel.sentence == sentence assert rel.position == 1 assert rel.project == project
def test_add_word(self): """Test the ``add_word()`` method of ``Sentence``. """ sentence = Sentence(text="foo") word = Word(word="foo") project = Project() project.save() sentence.save() word.save() rel = sentence.add_word(word, position=4, space_before=" ", part_of_speech="ADF", project=project) assert rel.word == word assert rel.sentence == sentence assert rel.position == 4 assert rel.space_before == " " assert rel.part_of_speech == "ADF" assert rel.project == project
def insert_into_db(): for episode_link in get_all_episode_links(): season, episode, sentences = parse_episode(episode_link) for sentence in sentences: if sentence.text: sen = Sentence( season=season, episode=episode, sentence=sentence.text.strip() ) db.session.add(sen) db.session.commit()
def test_add_dependency(self): """Test the ``add_dependency()`` method of ``Sentence``. """ sentence = Sentence(text="foo") word = Word(word="foo") dependency = Dependency(governor=word) project = Project() project.save() sentence.save() dependency.save() word.save() rel = sentence.add_dependency(dependency, governor_index=1, dependent_index=2, project=project) assert rel.dependency == dependency assert rel.sentence == sentence assert rel.governor_index == 1 assert rel.dependent_index == 2 assert rel.project == project
def test_add_word(self): """Test the ``add_word()`` method of ``Sentence``. """ project = Project() sentence = Sentence(text="foo", project=project) word = Word(lemma="foo") project.save() sentence.save() word.save() rel = sentence.add_word(word, position=4, space_before=" ", project=project) assert rel.word == word assert rel.sentence == sentence assert rel.position == 4 assert rel.space_before == " " assert rel.project == project
def new_question(): form = SentenceForm() if form.validate_on_submit(): sentence = Sentence(question=form.question.data, answer=form.answer.data, author=current_user) db.session.add(sentence) db.session.commit() flash('Your question has been created!', 'success') return redirect(url_for('questions')) return render_template('create_question.html', title='New Question', form=form, legend='New Question')
def test_add_dependency(self): """Test the ``add_dependency()`` method of ``Sentence``. """ project = Project() sentence = Sentence(text="foo", project=project) word = Word(lemma="foo") dependency = Dependency(governor=word) project.save() sentence.save() dependency.save() word.save() rel = sentence.add_dependency(dependency, governor_index=1, dependent_index=2, project=project) assert rel.dependency == dependency assert rel.sentence == sentence assert rel.governor_index == 1 assert rel.dependent_index == 2 assert rel.project == project
def test_model_sentence(self): """Test to make sure that Sentence is working properly. """ text = "hello world" sentence = Sentence() sentence.text = text assert sentence.text == text word_1 = Word(lemma="hello") word_2 = Word(lemma="world") sentence.words.append(word_1) sentence.words.append(word_2) assert sentence.words == [word_1, word_2] sentence.save() #Test with Dependencies dependency1 = Dependency() dependency2 = Dependency() sentence.dependencies = [dependency1, dependency2] db.session.add_all([dependency1, dependency2]) db.session.commit() #Test with Sequences sequence1 = Sequence() sequence2 = Sequence() sentence.sequences = [sequence1, sequence2] db.session.add_all([sequence1, sequence2]) db.session.commit() #Test with Project project = Project() sentence.project = project db.session.add_all([project]) db.session.commit()
def read_chinese_sentences(): """ Reads sentences from txt and creates Sentence objects """ location = os.path.join(os.getcwd(), 'files') with open(os.path.join(location, 'sentences.txt'), encoding='utf-8') as f: raw = f.read() lines = raw.split('\n') structures = set(len(l.split('\t')) for l in lines) assert len(structures) == 1 for l in lines: data = l.split('\t') fields = { 'zi_simp': get_chinese(data[0]), 'pinyin_tone': data[1], 'english': data[2], } s = Sentence(**fields) db.session.add(s) db.session.commit()
def test_model_sentence(self): """Test to make sure that Sentence is working properly. """ text = "hello world" sentence = Sentence() sentence.text = text assert sentence.text == text word_1 = Word(word="hello") word_2 = Word(word="world") sentence.words.append(word_1) sentence.words.append(word_2) assert sentence.words == [word_1, word_2] sentence.save() #Test with Dependencies dependency1 = Dependency() dependency2 = Dependency() sentence.dependencies = [dependency1, dependency2] db.session.add_all([dependency1, dependency2]) db.session.commit() #Test with Sequences sequence1 = Sequence() sequence2 = Sequence() sentence.sequences = [sequence1, sequence2] db.session.add_all([sequence1, sequence2]) db.session.commit()
def xmlImport(inputFile): def named_parameters(model): columns = [m.key for m in model.__table__.columns] return columns # Truncate DB Sentence.query.delete() Entity.query.delete() Interaction.query.delete() OntologyAnnotation.query.delete() db.session.commit() tree = ET.parse(inputFile) root = tree.getroot() for sentence in root: sentence_attr = { el.tag: el.text for el in sentence if el.text and el.tag in named_parameters(Sentence) } # Entity entities = [] for entity in sentence.iter("entity"): entity_attr = { el.tag: el.text for el in entity if el.text and el.tag in named_parameters(Entity) } # Ontology Annotation ontologyAnnotations = [] for ontologyAnnotation in entity.iter("ontologyAnnotation"): ontologyAnnotation_attr = { el.tag: el.text for el in ontologyAnnotation if el.text and el.tag in named_parameters(OntologyAnnotation) } ontologyAnnotation_attr["default"] = ( ontologyAnnotation_attr["default"] == 'True') ontologyAnnotations.append( OntologyAnnotation(**ontologyAnnotation_attr)) if not "comment" in entity_attr: entity_attr["comment"] = "" entity_attr["ontologyAnnotations"] = ontologyAnnotations entities.append(Entity(**entity_attr)) # Interaction interactions = [] for interaction in sentence.iter("interaction"): interaction_attr = { el.tag: el.text for el in interaction if el.text and el.tag in named_parameters(Interaction) } if not "comment" in interaction_attr: interaction_attr["comment"] = "" interactions.append(Interaction(**interaction_attr)) sentence_attr["entities"] = entities sentence_attr["interactions"] = interactions if not "comment" in sentence_attr: sentence_attr["comment"] = "" s = Sentence(**sentence_attr) db.session.add(s) db.session.commit()
def tweetSentimentAnalysis(self): outFile = codecs.open("testTweetsLineByLine.txt", 'w', "utf-8") # Read all the tweets from "testTweets.txt" and # split + store them to tweets_list tweets = codecs.open("testTweets.txt", 'r', "utf-8").read() tweets_list = tweets.split('\n') #tweets.close() - AttributeError: 'str' object has no attribute 'close' positive_counter = 0 negative_counter = 0 sentences = [] #implementation of topic modelling for idx, tweet in enumerate(tweets_list): if (len(tweet)): print(tweet) stop = set(stopwords.words('english')) exclude = set(string.punctuation) lemma = WordNetLemmatizer() def clean(data): stop_free = " ".join( [i for i in data.lower().split() if i not in stop]) punc_free = ''.join(ch for ch in stop_free if ch not in exclude) normalized = " ".join( lemma.lemmatize(word) for word in punc_free.split()) return normalized tweet_clean = [clean(tweet).split()] print(tweet_clean) dictionary = corpora.Dictionary(tweet_clean) doc_term_matrix = [ dictionary.doc2bow(doc) for doc in tweet_clean ] Lda = gensim.models.ldamodel.LdaModel ldamodel = Lda(doc_term_matrix, num_topics=1, id2word=dictionary, passes=50) ldamodel = ldamodel.print_topics(num_topics=1, num_words=3) words = ldamodel[0][1] words = words.split("+") import re topicModelingValues = [] for word in words: word = re.sub(r'[?|$|.|!]', r'', word) word = re.sub(r'[^a-zA-Z0-9 ]', r'', word) result = ''.join( [word for word in word if not word.isdigit()]) topicModelingValues.append(result) # Call outputo`unt() on each tweet stored in tweets_list and # increment positive_counter and negative_counter accordingly p, n = self.posNegCount(tweet) positive_counter += p negative_counter += n temp = p - n dataToBeSaved = "tweet with idx : --- {} --- \n , have the following postivity : {} \n\n".format( tweet, temp) #print(dataToBeSaved.encode("utf-8")) outFile.write(dataToBeSaved) ner = self.nerfun(tweet) if p > n: sentiment_sent = "POSITIVE" elif p < n: sentiment_sent = "NEGATIVE" else: sentiment_sent = "NEUTRAL" ts = time.time() sentences.append( Sentence(ts, tweet, sentiment_sent, str(datetime.datetime.now()), ner, topicModelingValues, "temp")) outFile.close() if positive_counter > negative_counter: sentiment = "POSITIVE" elif positive_counter < negative_counter: sentiment = "NEGATIVE" else: sentiment = "NEUTRAL" valuesSum = positive_counter + negative_counter neural_counter = len(tweets_list) - valuesSum output = { 'positive': positive_counter, 'negative': negative_counter, 'sentiment': sentiment, 'valuesSum': valuesSum, 'neural': neural_counter } blockchain = Blockchain() ts = time.time() call = blockchain.add_topic(id=ts, keyword=self.keyword, sentiment_result=output, date=str(datetime.datetime.now())) topicId = str(ts) for sent in sentences: sent.topicId = topicId blockchain.add_sentence(sent.id, sent.content, sent.sentiment_result, sent.date, sent.ner, sent.topicModelingValues, sent.topicId) print( "//////////////////////////////////////////////////////////////////////////" ) print("positive_counter:", positive_counter, "negative_counter:", negative_counter, "valuesSum", valuesSum) print( "//////////////////////////////////////////////////////////////////////////" ) # Hopefully, this is self-explanatory return (output)
def createBlock(blockLines): headLineComponents = blockLines[0].split("\t") ids = headLineComponents[1].split("__") pmid = ids[0] sentenceID = ids[1] sentence = headLineComponents[2] score = float(headLineComponents[3]) # add Sentence s = Sentence(pubmedID=pmid, sentenceID=sentenceID, literal=sentence, score=score, grade=defaultGrade, comment=defaultComment) db.session.add(s) for line in blockLines[1:]: lineComponents = line.split("\t") # Possible forms: # PROTEIN_EXACT\tstart end\tword\tdatabase id # PROTEIN_GENIA\tstart end\tword\tPROTEIN_REFLECT\tstart end\tword\tdatabase id # PATTERN\tstart end\tinteraction_type try: kind = lineComponents[0] if kind == "PATTERN": start, end = startEnd(lineComponents[1]) type = lineComponents[2] # add Interaction i = Interaction(type=type, start=start, end=end, sentence=s, grade=defaultGrade, comment=defaultComment) db.session.add(i) else: type, software = typeSoftware( kind.replace("GO_PROCESS", "GO-PROCESS").replace("CHEM", "CHEMICAL")) if type.upper() != "PROTEIN" or software.upper() == "EXACT": start, end = startEnd(lineComponents[1]) name = lineComponents[2] databaseID = lineComponents[3] # add Entity e = Entity(type=type, software=software, name=name, databaseID=databaseID, start=start, end=end, sentence=s, grade=defaultGrade, comment=defaultComment) db.session.add(e) else: # partial overlap databaseID = lineComponents[6] type1, software1 = typeSoftware(lineComponents[0]) start1, end1 = startEnd(lineComponents[1]) name1 = lineComponents[2] # add Entity e1 = Entity(type=type1, software=software1, name=name1, databaseID=databaseID, start=start1, end=end1, sentence=s, grade=defaultGrade, comment=defaultComment) db.session.add(e1) type2, software2 = typeSoftware(lineComponents[3]) start2, end2 = startEnd(lineComponents[4]) name2 = lineComponents[5] # add Entity e2 = Entity(type=type2, software=software2, name=name2, databaseID=databaseID, start=start2, end=end2, sentence=s, grade=defaultGrade, comment=defaultComment) db.session.add(e2) except: raise "Error: {}\n\nLine Components: {}".format( sys.exc_info()[0], lineComponents) db.session.commit()