Esempio n. 1
0
def run_news_through_filter(news):

    print('Filtering news...')

    filtered = []

    news = [random.choice(news)]

    for item in news:
        headline = item['title'].split()

        altered_sentence = []

        for word in headline:
            w = Word(word)
            synonym = w.synonyms(relevance=1)

            if len(synonym) == 0:
                word_to_use = word
            else:
                word_to_use = random.choice(synonym)

            altered_sentence.append(word_to_use)

        altered_headline = ' '.join(word for word in altered_sentence)
        tmp = {'altered_title': altered_headline, 'url': item['url']}
        filtered.append(tmp)

    return filtered[0]
    def get_word_to_synonyms_dict(self, n, text, tokenized_text):
        word_to_syns_dict = {}
        word_to_pos = self.get_important_words(tokenized_text)    

        for w in tokenized_text:
            
            if w in word_to_pos:
                list_of_syns_for_w = []
                original_synset = lesk(text, w)
                if original_synset:
                    word = Word(w)
                    p_o_s = pos_dict_thesaurus[word_to_pos[w]]
                    syns = word.synonyms('all', partOfSpeech=p_o_s)
                    flat_list = [item for sublist in syns for item in sublist]
                    for candidate_syn in flat_list:
                        candidate_synsets = wordnet.synsets(candidate_syn, pos=pos_dict[word_to_pos[w]])
                        if len(candidate_synsets) > 0:
                            list_sims = [original_synset.wup_similarity(x) for x in candidate_synsets if original_synset.wup_similarity(x)]
                            if len(list_sims) > 0:
                                maxSim = max(list_sims)
                                list_of_syns_for_w.append((candidate_syn, maxSim))
                    if list_of_syns_for_w:
                        list_of_syns_for_w.sort(key=lambda x: x[1], reverse=True)
                        n_truncate = n if n <= len(list_of_syns_for_w) else len(list_of_syns_for_w)
                        word_to_syns_dict[(w, word_to_pos[w])] = list_of_syns_for_w[:n_truncate]
        return word_to_syns_dict
Esempio n. 3
0
def parallel(inputs):
    (word, word_Dictionary, count, chosen_index) = inputs
    new_instance = Word(word)
    synonyms = new_instance.synonyms('all', relevance=[3], partOfSpeech=part)
    response = findWordInDataset(word_Dictionary, synonyms, word, count)
    if (response == False):
        synonyms = new_instance.synonyms('all',
                                         relevance=[2],
                                         partOfSpeech=part)
        response = findWordInDataset(word_Dictionary, synonyms, word, count)
        if (response == False):
            synonyms = new_instance.synonyms('all',
                                             relevance=[1],
                                             partOfSpeech=part)
            response = findWordInDataset(word_Dictionary, synonyms, word,
                                         count)
    return (chosen_index, response)
Esempio n. 4
0
def cli(word):
    rWord = Word(word)
    synonyms = rWord.synonyms()
    if not synonyms:
        click.echo("No results.")
        return

    for idx, synonym in enumerate(synonyms):
        click.echo("{0}. {1}".format(idx + 1, synonym))
Esempio n. 5
0
def find_synonym(word):
    if word in common:
        return common[word]
    w = Word(word)
    syns = w.synonyms()
    for syn in syns:
        if not has5(syn):
            return syn
    return word.replace('e', '-')
Esempio n. 6
0
def get_synonyms(words_dict):
    word_syns = {}
    for w in tqdm.tqdm(words_dict):
        word = Word(w)
        try:
            syns = word.synonyms(relevance=3)
        except:
            syns = None
        if syns is not None:
            word_syns[w] = syns
    return word_syns
Esempio n. 7
0
def _read(filename):
    with open('datsets/categories/%s' % (filename,), 'r') as fp:
        data = fp.read()
        words = [s.strip() for s in data.splitlines()]
        categories[filename] = set(words)
        for word in words:
            if word in ["what", "why", "analyse"]:
                continue
            w = Word(word)
            categories[filename].update(w.synonyms())
        categories[filename] = list(categories[filename])
Esempio n. 8
0
def get_syn_classes(words):
    syns = []
    for i in range(len(words)):
        try:
            w = Word(words[i])
            syn = w.synonyms('all')
            for j in range(len(syn)):
                syns = syns + syn[j]
        except:
            pass
    words = words + syns
    return words
Esempio n. 9
0
def get_syn(words):
    syns = []
    for i in range(len(words)):
        try:
            w = Word(words[i])
            syn = w.synonyms('all')
            for j in range(len(syn)):
                syns = syns + syn[j]
        except:
            #print(words[i],' not found')
            pass
    words = words + syns
    return words
Esempio n. 10
0
def generate_synonyms_dict(words_list):
    synonyms_dict = dict()
    for chars in words_list:
        try:
            w = Word(chars)
            syns = w.synonyms(
                relevance=3,
                form='common',
            )
            synonyms_dict[chars] = syns
        except:
            synonyms_dict[chars] = [
                chars,
            ]

    return synonyms_dict
Esempio n. 11
0
def new_headline(headline):
    head_list = headline.split()
    new_head = []
    for word in head_list:
        if word in STOP:
            new_head.append(word.upper())
            continue
        w = Word(word)
        syn = w.synonyms()
        if syn:
            new = syn[0]
            new_head.append(new.upper())
        else:
            new_head.append(word.upper())
    new_headline_string = (' ').join(new_head)
    return new_headline_string
Esempio n. 12
0
def get_synonyms(word, src="wordnet"):
    synonyms = set()
    if src == "wordnet":
        for ss in wn.synsets(word):
            synonyms |= set(ss.lemma_names())
    elif src == "thesaurus":
        try:
            w = Word(word)
        except:
            return synonyms
        try:
            syn = w.synonyms(relevance=[2, 3])
        except:
            return synonyms
        for s in syn:
            if len(s.split(' ')) == 1:
                synonyms.add(s.lower())
    return synonyms
Esempio n. 13
0
def listConcept(word):
    w = Word(word)
    iE = inflect.engine()
    pS = PorterStemmer()
    wordSyns = w.synonyms()
    myList1 = []
    myList1.append(word)
    for w in wordSyns:
        myList1.append(w)
    myList2 = myList1.copy()
    for w in myList2:
        pW = iE.plural(w)
        myList1.append(pW)
    myList3 = myList1.copy()
    for w in myList3:
        myList1.append(pS.stem(w))
    myList1 = list(dict.fromkeys(myList1))
    return myList1
Esempio n. 14
0
def anotherword(response): #Some rudimentary synonyms and antonyms.
    choice=input('Enter your choice: \n 1.Synonyms.\n2.Antonyms\n')
    if(int(choice)>2):
     print("Invalid Choice")
     exit()
    word=input("Enter the word:")
    w = Word(word)
    what=''
    if('1') in choice:
        temp=w.synonyms()
        what='Synonyms'
    elif('2') in choice:
        temp=w.antonyms()
        what='Antonyms'
    print('Showing %s of %s'%(what,word))
    
    for t in temp:
        print(t)
Esempio n. 15
0
def redact_concept(data, concept):
    from thesaurus import Word
    w = Word(concept)
    w1 = []
    for i in w.synonyms():
        w1.append(i.lower())
    w1.append(concept)
    concept1 = ''
    #sent=data.split('\n')
    for i in nltk.word_tokenize(data):
        if i.lower() in w1:
            stats.append([i, len(i), 'Concept'])
            concept1 += '█' * len(i)
            concept1 += ' '
        elif i == '.':
            concept1 += i
            concept1 += ''
        else:
            concept1 += i
            concept1 += ' '
    return concept1
def reda_concept(data3, filename, con):
    tokens = nltk.word_tokenize(data3)
    w = Word(con)
    concept = w.synonyms()
    concept.append(con)
    for i in concept:
        for j in range(len(tokens)):
            if i.lower() == tokens[j].lower():
                stats.append(
                    [tokens[j],
                     len(tokens[j]), filename, 'type:Concept'])
                tokens[j] = '█' * len(i)

    reda = ''
    for i in tokens:
        if i in ['.', ',', ':', ';', '"', '?', '!', '(', ')']:
            reda = reda + i
        else:
            reda = reda + i + ' '

    return (data3)
Esempio n. 17
0
def getSynonyms(word):
    syns = set()
    result = vb.synonym(word)
    if isinstance(result, list):
        result = json.loads(result)
        syns.update([synonym['text'] for synonym in result])
    result = []
    for syn in wordnet.synsets(word):
        for l in syn.lemmas():
            syns.add(l.name())
    w = Word(word)
    syns.update(w.synonyms())
    if not syns:
        apiKey = os.getenv('BIG_HUGE')
        result = pyhugeconnector.thesaurus_entry(word=word,
                                                 api_key=apiKey,
                                                 pos_tag='n',
                                                 ngram=2,
                                                 relationship_type='syn')
        if isinstance(result, list):
            syns.update(result)
    return syns
Esempio n. 18
0
    def process_words(self):
        """
        Sort words, look for duplicates, then get synonyms
        and write to output files. Looking for duplicates
        here because that is an indication that multiple
        sources think the term is appropriate for the
        given grade level.
        """
        # print(self.words)
        # get duplicate words - the words we want to use
        words = self.words
        duplicates = list(set([x for x in words if words.count(x) > 1]))
        duplicates.sort()
        """ try a different way """
        for word in duplicates:
            try:
                w = Word(word)
            except MisspellingError:
                continue
            else:

                # There are 3 relevance levels you can use, 1 will give the set with the most words
                # and possibly some irrelevant words. Here we use 3 to make sure everything stays on topic.
                syns = w.synonyms(relevance=3)

                if syns:
                    for s in syns:
                        self.outputFiles[1].write(word + " " + s + "\n")

                ants = w.antonyms(relevance=3)
                if ants:
                    for a in ants:
                        self.outputFiles[2].write(word + " " + a + "\n")

                if syns or ants:
                    self.outputFiles[0].write(word + " " + self.gradeLevel +
                                              "\n")
Esempio n. 19
0
 def get_synonyms(self, input_word):
     w = Word(input_word)
     print(wordnet.synsets('dog'))
     print(wordnet.synsets('test'))
     return w.synonyms()
Esempio n. 20
0
# from PyDictionary import PyDictionary
#
# dictionary=PyDictionary()
#
#
# print (dictionary.synonym("Life"))

from nltk.corpus import wordnet

synonyms = []

for syn in wordnet.synsets("goal"):
    for lm in syn.lemmas():
        synonyms.append(lm.name())
print(set(synonyms))

from thesaurus import Word
w = Word(purpose)
print(w.synonyms())
'''
28-Get synonyms from Thesaurus
You may need to pip install thesaurus
'''
from thesaurus import Word

USERS_WORD = Word('humbug')
print(USERS_WORD.synonyms(1))
Esempio n. 22
0
def synonyms():
    """QUERY"""
    word = request.args.get("word").lower()

    if word not in wordPath_list:
        wordPath_list.append(word)  # To create word path
    original_word = wordPath_list[0]
    """GET SYNONYMS"""
    # Get result
    try:
        results = Word(word)
    except exceptions.MisspellingError as msplt:
        session.clear()
        # return render_template("errorRedirect.html", error=msplt)
        session['mspltError'] = str(msplt)
        return redirect(f'/related?word={word}')
    except exceptions.WordNotFoundError as wnf:
        session.clear()
        session['wnf'] = str(wnf)
        return redirect(f'/related?word={word}')

    results = Word(word)
    resultData = results.data
    # session['word'] = word # To reuse in "/result"
    # Constructing parameters: part of speech and definitions
    numberOfOptions = len(results)
    partsOfSpeech = [
        item['partOfSpeech'].strip('.') for item in resultData
    ]  #.strip because for example, in html, div id = "adj.1" will not function. it is supposed to be adj1
    definitions = [item['meaning'] for item in resultData]

    # Constructing parameters: synonyms for each part of speech and definitions
    synList = results.synonyms('all')
    flat_synList = [
        item for sublist in synList for item in sublist
    ]  # Nothing to do with this part. This is for the next part GET RELATED WORDS!
    """GET RELATED WORDS"""
    # Result from helpers
    Related_words_data = RelatedWords(word)

    # Filter results: No overlapping with Synonyms, restricted to 10 words, with score rounded
    Related_words_data = [
        item for item in Related_words_data if item['word'] not in flat_synList
    ]  # Avoid overlap with Synonyms
    Related_words_data = Related_words_data[:
                                            10]  # Trimming to avoid irrelevant results

    # Rounding the 'score' values in the above dict results Related_words_data
    for item in Related_words_data:
        item['score'] = round(item['score'], 2)
    """GET RELATED CONCEPTS"""
    concepts = Relations("_".join(
        word.split()))  # For example, "eat    mice  " becomes "eat_mice"

    # Process data: Get the lists of relations id and relations label
    relations_list_id = []
    for item in concepts:
        if item['rel']['@id'] not in relations_list_id:
            relations_list_id.append(item['rel']['@id'])

    relations_list_label = []
    for item in concepts:
        if item['rel']['label'] not in relations_list_label:
            relations_list_label.append(item['rel']['label'])

    # Rounding the 'weight' values to display better in html
    for item in concepts:
        item['weight'] = round(item['weight'], 2)

    # Change the labels to normal (eg. RelatedTo to "is related to") for easier printing out in html
    label_dict = {
        'RelatedTo': 'is related to',
        'ExternalURL': 'can be linked to other sites',
        'FormOf': 'is a form of',
        'IsA': 'is a',
        'PartOf': 'is a part of',
        'HasA': 'has',
        'UsedFor': 'is used for',
        'CapableOf': 'is capable of',
        'AtLocation': 'can be found in',
        'Causes': 'can lead to',
        'HasSubevent': 'is followed by',
        'HasFirstSubevent': 'starts with',
        'HasLastSubevent': 'ends with',
        'HasPrerequisite': 'requires',
        'HasProperty': 'has property or can be described as',
        'MotivatedByGoal': 'in order to',
        'ObstructedBy': 'is prevented by',
        'Desires': 'typically wants',
        'CreatedBy': 'is created by',
        'Synonym': 'has similar meaning with',
        'Antonym': 'has opposite meaning with',
        'DistinctFrom': 'is distinct from',
        'SymbolOf': 'is a symbol of',
        'DefinedAs': 'can be defined or explained as',
        'Entails': 'entails',
        'MannerOf': 'is a way of',
        'LocatedNear': 'can be found near',
        'HasContext': 'is often used in',
        'SimilarTo': 'is similar to',
        'EtymologicallyRelatedTo': 'has common origin with',
        'EtymologicallyDerivedFrom': 'is derived from',
        'CausesDesire': 'makes someone want',
        'MadeOf': 'is made of',
        'ReceivesAction': 'can be',
        'InstanceOf': 'is an example of',
        'NotDesires': 'typically not want',
        'DerivedFrom': 'is derived from'
    }  # Build a dictionary and use it to look up relation labels

    # Create a new list as a copy of label lists to store real relation list labels, eg 'RelatedTo' to 'is related to'
    real_relations_list_label_names = relations_list_label.copy()
    for i in range(len(
            relations_list_label)):  # Populate the list of real relation names
        if relations_list_label[i] in label_dict.keys():
            real_relations_list_label_names[i] = label_dict[
                relations_list_label[i]]

    # List of start node
    start_node_list = []
    for item in concepts:
        if item['start']['label'] not in start_node_list:
            start_node_list.append(item['start']['label'])

    # List for each relation
    concept_network = {}

    for item in relations_list_label:
        concept_network[item] = [
        ]  # Initiate a list as value for each key/relation

    for item in relations_list_label:
        for edge in concepts:
            if item == edge['rel']['label']:
                obj_to_append = dict(
                    (i, edge[i])
                    for i in ('start', 'end', 'rel',
                              'weight'))  # Only take the important stuff
                concept_network[item].append(obj_to_append)

    # Problem: sometimes the word queried is 'start', other times it is 'end'
    word_start_or_end = {}

    for i in range(len(relations_list_label)):
        for j in range(len(concept_network[relations_list_label[i]])):
            if word in concept_network[
                    relations_list_label[i]][j]['end']['label']:
                word_start_or_end[relations_list_label[i]] = 'end'
            else:
                word_start_or_end[relations_list_label[i]] = 'start'

    # Return
    return render_template("results.html", partsOfSpeech=partsOfSpeech, definitions=definitions, synList=synList, numberOfOptions=numberOfOptions, \
        resultData=resultData, Related_words_data=Related_words_data, relations_list_label=relations_list_label, concept_network=concept_network, \
            real_relations_list_label_names=real_relations_list_label_names, word_start_or_end=word_start_or_end, original_word=original_word, word=word, wordPath_list=wordPath_list)
Esempio n. 23
0
    def generate_output_text(self):
        text = self.text
        output_text = text
        #         print(output_text)
        words = text.split()
        counts = {}
        for word in words:
            if word not in counts:
                counts[word] = 0
            counts[word] += 1

        one_word = []
        for key, value in counts.items():
            if value == 1 and key.isalpha() and len(key) > 2:
                one_word.append(key)

        noun = []
        verb = []
        nlp = spacy.load('en_core_web_sm')
        doc = nlp(u'' + ' '.join(one_word))
        for token in doc:
            if token.pos_ == 'VERB':
                verb.append(token.text)
            if token.pos_ == 'NOUN':
                noun.append(token.text)


#         print('\n verb: \n')
#         print(str(len(verb)))
#         print(verb)

#         print('\n noun: \n')
#         print(str(len(noun)))
#         print(noun)

        all_main = verb + noun
        len_all = len(noun) + len(verb)
        final_value = int(len_all * self.percent / 100)
        random.seed(4)
        temp = random.sample(range(0, len_all), final_value)
        for i in temp:
            try:
                word_str = all_main[i]
                w = Word(word_str)
                a1 = list(w.synonyms())

                #                 print('\n'+str(i)+'\n')
                #                 print(word_str)
                #                 print(a1)

                if i < len(verb):
                    change_word = self.synalter_Noun_Verb(word_str, a1, 'v')
                    try:
                        search_word = re.search(r'\b(' + word_str + r')\b',
                                                output_text)
                        Loc = search_word.start()
                        output_text = output_text[:int(
                            Loc)] + change_word + output_text[int(Loc) +
                                                              len(word_str):]
                    except:
                        f = 0

                else:
                    change_word = self.synalter_Noun_Verb(word_str, a1, 'n')
                    try:
                        search_word = re.search(r'\b(' + word_str + r')\b',
                                                output_text)
                        Loc = search_word.start()
                        output_text = output_text[:int(
                            Loc)] + change_word + output_text[int(Loc) +
                                                              len(word_str):]
                    except:
                        f = 0

            except:
                f = 0

        return output_text
Esempio n. 24
0
            for _, records in groupby(sorted(lst, key=keyprop), keyprop)
        ]


    a = [{'time': '25 APR', 'total': 10, 'high': 10},
         {'time': '26 APR', 'total': 5, 'high': 5}]

    b = [{'time': '24 APR', 'total': 10, 'high': 10},
         {'time': '26 APR', 'total': 15, 'high': 5}]
    merger = merge_list_of_records_by('time', add)
    hasil_merge = merger(a+b)
    print(hasil_merge)

    print("sinonim with thesaurus==================================================================")
    # from PyDictionary import PyDictionary
    #
    # dictionary = PyDictionary()
    # print(dictionary.synonym("good"))

    from thesaurus import Word

    w = Word('suicidal')
    syn = w.synonyms()
    print(syn)

    sn = SenticNet()
    try:
        concept_info_sinonim = sn.concept("suicidal")
        print(concept_info_sinonim)
    except Exception as e:
        print(e)
Esempio n. 25
0
class Synonym(Text): #class untuk mmengeluarkan synonym kata
    part_of_speech = ("adj","adv","contradiction","conj","determiner","interj","noun","prefix","prep","pron","verb","abb","phrase","article")
    def __init__(self,master,width,height,word,column,columnspan,row,partspeech): #inisiasi class 
        self.base_word=Word(word)
        self.master = master
        self.width = width
        self.height = height
        self.column = column
        self.row = row
        self.columnspan = columnspan
        self.partspeech = partspeech

    def Search(self): #fungsi mencari synonym kata
        if check_internet("http://google.com") : #cek koneksi internet
            temp_str = ""
            if self.partspeech != 14: #pengecekan tipe part of speech, bukan 14 menandakan tidak "all"
                if FrontApp.condition_synonym != "all" : 
                    list_synonym = self.base_word.synonyms('all',form=FrontApp.condition_synonym,partOfSpeech=Synonym.part_of_speech[self.partspeech],allowEmpty=False)
                else :
                    list_synonym = self.base_word.synonyms('all',partOfSpeech=Synonym.part_of_speech[self.partspeech],allowEmpty=False)
                if list_synonym != []: #pengecekan ketersediaan hasil
                    Text.__init__(self,self.master,width=self.width,height=self.height)
                    self.grid(row=self.row,column=self.column,columnspan=self.columnspan)
                    self.config(state=NORMAL)
                    self.delete("1.0",END)
                    for k in list_synonym:
                        for j in k:
                            temp_str += j+"\n"
                        temp_str += "\n"
                    self.insert("1.0",temp_str)
                    self.config(state=DISABLED)
                    temp_str=""
                else :
                    Text.__init__(self,self.master,width=self.width,height=self.height)
                    self.grid(row=self.row,column=self.column,columnspan=self.columnspan)
                    self.config(state=NORMAL)
                    self.delete("1.0",END)
                    self.insert("1.0","None")
                    self.config(state=DISABLED)
            else :
                if FrontApp.condition_synonym == "all": 
                    list_synonym = self.base_word.synonyms('all',allowEmpty=False)
                else :
                    list_synonym = self.base_word.synonyms('all',form=FrontApp.condition_synonym,allowEmpty=False)
                if list_synonym != [] :
                    Text.__init__(self,self.master,width=self.width,height=self.height)
                    self.grid(row=self.row,column=self.column,columnspan=self.columnspan)
                    self.config(state=NORMAL)
                    self.delete("1.0",END)
                    for k in list_synonym:
                            for j in k:
                                temp_str += j+"\n"
                            temp_str += "\n"
                    self.insert("1.0",temp_str)
                    self.config(state=DISABLED)
                    temp_str=""
                else :
                    Text.__init__(self,self.master,width=self.width,height=self.height)
                    self.grid(row=self.row,column=self.column,columnspan=self.columnspan)
                    self.config(state=NORMAL)
                    self.delete("1.0",END)
                    self.insert("1.0","None")
                    self.config(state=DISABLED)
        else :
            showerror(message="No Connection",title="Connection Error")
Esempio n. 26
0
def get_synonyms(word):
    my_word = Word(word)
    return my_word.synonyms()
Esempio n. 27
0
def find_similar_relations_using_thesaurus(word):
    w = Word(word)
    return w.synonyms()
Esempio n. 28
0
    def tf(self, p):
        ca = TrueFalsePopup()
        ca.open()

        # SYSTEM WILL DECIDE RANDOMLY TO PRINT FALSE OR TRUE
        m = random.randint(0, 1)
        print(m)
        global TFAnswer

        base_path = os.path.dirname(os.path.realpath(__file__))
        xml_file = os.path.join(base_path, "definition.xml")

        tree = ET.parse(xml_file)
        root = tree.getroot()

        for Def in root.findall("Def"):
            module = Def.find('tags').text
            if module == p:
                j = Def.find('description').text
        print(j)

        definition = j

        # PRINT QUESTION
        if m == 1:
            ca.ids.L.text = definition
            TFAnswer = 0
        elif m == 0:

            # BREAK THE DEFINITION INTO INDIVIDUAL WORDS
            BreakDefinition = word_tokenize(definition)
            # print(BreakDefinition)

            # LABEL EACH WORD WITH ITS CORRESPONDING PART OF SPEECH TAG
            grammar = nltk.pos_tag(BreakDefinition)
            # print(grammar)

            # CREATE A LIST OF WORDS THAT HAS THE NN TAG
            is_noun = lambda pos: pos[:2] == 'NN'
            list_of_nn = [word for (word, pos) in grammar if is_noun(pos)]
            # shuffle = random.shuffle(list_of_nn)
            # print(list_of_nn)

            # IF THE LIST IS MORE THAN 3, TAKE ONLY 3
            if len(list_of_nn) >= 3:
                choice = random.sample(list_of_nn, 3)
            else:
                choice = list_of_nn
            print(choice)

            # FOR EACH OF THE WORD GET AN ALTERNATIVE WORD
            z = []
            for i in choice:
                print(i)
                thesaurus = Word(i)
                syn = thesaurus.synonyms()
                print(syn)
                if len(syn) < 1:
                    syn = i
                c = random.choice(syn)
                z.append(c)
                print(z)

            # REWRITE THE DEFINITION WITH THE ALTERNATIVE WORD
            a = 0
            for i in choice:
                j = BreakDefinition.index(i)
                BreakDefinition[j] = z[a]
                a = a + 1

            FalseStatement = " ".join(BreakDefinition)
            print(FalseStatement)
            la = FalseStatement
            ca.ids.L.text = la
            TFAnswer = 1
Esempio n. 29
0
    arg_index += 1

# clean query
if parameters.case_folding:
    query = query.lower()
query = re.sub(r'[^ a-zA-Z0-9]', ' ', query)
query = re.sub(r'\s+', ' ', query)
query_words = query.split(' ')

print "before", query_words, "\n"
if parameters.thesaurus:
    synonyms = []
    for term in query_words:
        if term != "":
            word = Word(term)
            synonyms += word.synonyms()
            if term in synonyms:
                synonyms.remove(term)
    query_words += synonyms
print "After", query_words, "\n"
# create accumulators and other data structures
accum = {}
filenames = []
p = porter.PorterStemmer()

# get N
f = open(collection + "_index_N", "r")
N = eval(f.read())
f.close()

# get document lengths/titles
Esempio n. 30
0
# reading csv file
with open(filename, 'r') as csvfile:
    # creating a csv reader object
    csvreader = csv.reader(csvfile)

    # extracting field names through first row

    # extracting each data row one by one
    for row in csvreader:
        row = row[4:len(row)]
        for ele in row:
            for x in ele.split():
                if x not in words.keys():
                    words[x] = [x]
                    w = Word(x)
                    syno = w.synonyms()
                    print(syno)
                    if syno is not None:
                        for s in syno:
                            words[x].append(s)

print(words)

with open(newfile, "w") as f:
    for values in words.values():
        st = ''
        if len(values) < 8:
            for ele in values:
                st += ele + ','
        else:
            for ele in values[0:9]: