Beispiel #1
0
 def CreateRule(self):
     #Connect to the postproseccing database
     con = SqlaCon(Base, engine)
     ruletype = multimenu({'d':'tme as dependent','h':'tme as head'},'What will you base the rule on?')
     if ruletype.answer == 'd':
         headrule = RejectDepHead()
         headrule.reject_dep_arg = [RejectDepArg()]
         #Set the rule attributes
         setRuleAttributes(headrule,self.head)
         setRuleAttributes(headrule.reject_dep_arg[-1],self.dependent)
         #Mark, whether this is an accepting or rejecting rule
         if self.rejected == 'n':
             headrule.reject_dep_arg[-1].action = 'a'
         elif self.rejected == 'y':
             headrule.reject_dep_arg[-1].action = 'r'
         ##Insert to db
         con.insert(headrule)
     elif ruletype.answer == 'h':
         whichsub = multimenu(self.subdependents,'Which subdependent is the rule associated with?')
         headrule = SubDepHead()
         headrule.subdeparg = [SubDepArg()]
         #Set the rule attributes
         setRuleAttributes(headrule,self.dependent)
         setRuleAttributes(headrule.subdeparg[-1],self.subdependentobjects[whichsub.answer])
         #Mark, whether this is an accepting or rejecting rule
         if self.rejected == 'n':
             headrule.subdeparg[-1].action = 'a'
         elif self.rejected == 'y':
             headrule.subdeparg[-1].action = 'r'
         ##Insert to db
         con.insert(headrule)
Beispiel #2
0
 def CreateRejectionRule(self):
     #Connect to the postproseccing database
     con = SqlaCon(Base, engine)
     rule = Fidconst()
     rule.fi_dupl_constfix_collocates = list()
     collocatecandidates = dict()
     #The menu:
     selmenu = multimenu(self.sentences)
     selmenu.clearscreen = False
     selmenu.prompt_valid('Which word is the launcher?')
     #Iterate:
     for idx, match in enumerate(self.matchlist):
         if str(idx) == selmenu.answer:
            #If this is the user's choice, make it the launcher
            rule.launcherword = idx
            launcher = match.matchedword
         else:
             #if note, make them collocates
             collocatecandidates[idx]=match.matchedword
     #Build the part of the rule that concerns the launcher
     setRuleAttributes(rule,launcher)
     rule.reject = self.rejected
     #Build the part of the rule that concerns collocates
     for idx, collocate in collocatecandidates.items():
         sign = yesnomenu()
         sign.prompt_valid('Is this collocate significant in constructing the rule?: <{}>'.format(collocate.token))
         if sign.answer == 'y':
             rule.fi_dupl_constfix_collocates.append(Fidcoll())
             setRuleAttributes(rule.fi_dupl_constfix_collocates[-1],collocate)
             rule.fi_dupl_constfix_collocates[-1].collocate_id = idx
     #Insert to db
     con.insert(rule)
Beispiel #3
0
def setRuleAttributes(rule, word):
    """Ask the user about which attributes with what value defines the rule"""
    word.printAttributes()
    selmenu = multimenu({'0':'token', '1':'lemma', '2':'feat', '3':'pos'})
    selmenu.clearscreen = False
    selmenu.prompt_valid('Which attribute is the criterion?')
    rule.criterionattr = selmenu.validanswers[selmenu.answer]
    rule.criterionval = getattr(word,rule.criterionattr)
Beispiel #4
0
def AnalyzeInformationStructure(dictrow):
    """Questions to help in analyzing the IS of a sentence"""

    print('\n' + textwrap.fill(dictrow['sent']) + '\n')

    tf = multimenu(
        {
            't': 'topiikki',
            'f': 'fokus',
            'd': 'Poista tämä analysoitavista'
        }, 'Ennemmin topiikki vai fokus?')

    if tf.answer == 'd':
        return False
    elif tf.answer == 't':
        dfanswers = {
            'a': 'a-topiikki',
            's': 's-topiikki',
            'as': 'additiivinen s-topiikki',
            'vs': 'varsinainen s-topiikki',
            'ki': 'implisiittinen k-topiikki',
            'ke': 'eksplisittinen k-topiikki'
        }
    else:
        dfanswers = {
            's': 'semanttinen fokus',
            'kk': 'Korjaava kontrastinen fokus',
            'tk': 'Toteava kontrastinen fokus'
        }

    dftype = multimenu(dfanswers, 'Valitse alalaji')

    # Erikseen vielä kysymys subjektista:
    subf = multimenu(
        {
            'a': 'aktivoitu',
            'sa': 'semiaktiivinen',
            'n': 'aktivoimaton'
        }, 'Millainen D-status subjektilla?')

    dictrow['dfunct'] = tf.answer + "_" + dftype.answer
    dictrow['subfunct'] = subf.answer

    return dictrow
Beispiel #5
0
 def __init__(self):
     self.menu = multimenu(MainMenu.mainanswers)
     # Selectable options:
     self.selectedlang = 'none'
     self.selecteddb = 'none'
     self.isparallel = 'no'
     self.searchcommitted = False
     self.columns = dict()
     #Control the program flow
     self.run = True
     self.pause = False
     self.conditionset = None
Beispiel #6
0
 def select(self):
     """Decide to reject or not and make rules based on the decision"""
     self.match.BuildSentencePrintString()
     print('{0}{1}{0}'.format('\n'*2,self.hlheadsentence))
     selmenu = multimenu({'y' : 'yes, REJECT this!', 'n' : 'no, ACCEPT this', 'q' : 'quit'})
     selmenu.clearscreen = False
     selmenu.prompt_valid('Should I REJECT this match?')
     if selmenu.answer == 'q':
         return False
     else:
         self.rejected = selmenu.answer
         self.evalueatesel()
         return True
Beispiel #7
0
 def AddConditions(self, headcond=False):
     """Parallel concordance search"""
     conditiondict = dict()
     columnmenu = multimenu(self.columnnames)
     addmoreconditions = multimenu({
         'y': 'add more',
         'q': 'stop adding conditions'
     })
     addmoreconditions.answer = 'y'
     while addmoreconditions.answer == 'y':
         vals = list()
         columnmenu.prompt_valid(
             self.optionstring +
             'What column should the condition be based on?')
         thiscolumn = self.columns[int(columnmenu.answer)]
         while thiscolumn.addmorevalues:
             vals.append(thiscolumn.PickSearchValue())
         if thiscolumn.regexcond:
             conditiondict["#" + thiscolumn.name] = vals[-1]
             self.FormatOptionString(
                 [thiscolumn.screenname, 'REGEX: ' + vals[-1]])
         elif thiscolumn.negativeconds:
             conditiondict["!" + thiscolumn.name] = tuple(vals)
             self.FormatOptionString([
                 thiscolumn.screenname, 'NOT EQUAL TO: ' + ' OR '.join(vals)
             ])
         else:
             conditiondict[thiscolumn.name] = tuple(vals)
             self.FormatOptionString(
                 [thiscolumn.screenname, ' OR '.join(vals)])
         addmoreconditions.prompt_valid(self.optionstring +
                                        'Keep adding search conditions?')
     if headcond:
         self.headcols = conditiondict
     else:
         self.condcols = conditiondict
Beispiel #8
0
def FilterNonTemporal(thisSearch):
    """Process matches and reject the ones that by your interpretation are not temporal"""
    logging.info('Filtering NON-TEMPORAL: ' + '*'*150)
    #Connect to databases
    con = SqlaCon(Base, engine)
    con.LoadSession()
    matchestoprocess = list()
    isRussian = False
    if thisSearch.queried_table == 'ru_conll':
        isRussian = True
    # Count how much is to be processed and exlcude those that already are
    print('Counting and applying rules')
    i = 0
    for key, matches in thisSearch.matches.items():
        print('{}/{}'.format(i,len(thisSearch.matches)), end='\r')
        for match in matches:
            if not match.postprocessed:
                #If this match has not yet been processed
                #First, check if there is a rule concerning this match
                thismatch = PotetialNontemporal(match, isRussian) 
                #Build a clause object: if the clause has no finite verb, do not apply rules
                thismatch.matchedclause = Clause(match.matchedsentence, match.matchedword)
                if not thismatch.CheckExistingRules(con):
                    matchestoprocess.append(match)
                #matchestoprocess.append(match)
        i += 1
    #Start the actual processing:
    processed = 0
    for match in matchestoprocess:
        processed += 1
        thismatch = PotetialNontemporal(match) 
        #Build a clause object: if the clause has no finite verb, do not apply rules
        thismatch.matchedclause = Clause(match.matchedsentence, match.matchedword)
        if not thismatch.CheckExistingRules(con):
            #If no predefined rules exist
            #Clear the output for conveniance
            os.system('cls' if os.name == 'nt' else 'clear')
            print('Processing match no {}/{}'.format(processed,len(matchestoprocess)))
            cont = thismatch.select()
            if not cont:
                return 'Remember to save, if necessary'
            else:
                #If something was rejected, ask about a rule:
                createrule = multimenu({'y':'yes','n':'no','x':'Create rule with dependents feat and heads (verbs) lemma'},'Create a rule?')
                if createrule.answer =='y':
                    thismatch.CreateRule()
                elif createrule.answer =='x':
                    thismatch.CreateQuickRule()
Beispiel #9
0
 def select(self):
     """Decide wich duplicate to reject and make rules based on the decision"""
     answers = dict()
     answers['n'] = 'none'
     answers['q'] = 'quit'
     #Build the question
     answers.update(self.sentences)
     selmenu = multimenu(answers)
     selmenu.clearscreen = False
     selmenu.prompt('Which one(s) will you reject? (if rejecting many, type all the indexes in one string)')
     if selmenu.answer == 'q':
         return False
     if len(self.sentences) > 2:
         for rejectidx in selmenu.answer:
             self.rejected = rejectidx
             self.evalueatesel()
     self.rejected = selmenu.answer
     self.evalueatesel()
     return True
Beispiel #10
0
    def PickSearchValue(self):
        """Select a value to be used in a search"""
        if not self.presetvalues:
            os.system('cls' if os.name == 'nt' else 'clear')
            headerstrings = [
                "Give a value the column {} should have.".format(
                    get_color_string(bcolors.BLUE, self.screenname)),
                "- If you want to use a regex, surround the string with forward slashes (e.g. /^[m|M]yregexstri.*/)",
                "- If this  is  a negative condition, begin the first condition with a ! (e.g. !dontmatchthis)",
                "- Press l to load a list of values from an external file\n>"
            ]
            value = input('\n'.join(headerstrings))
            if value == 'l':
                self.addmorevalues = False
                return LoadCsv()
            elif value[0] == '/' and value[-1] == '/':
                self.addmorevalues = False
                self.regexcond = True
                return value.strip('/')
            elif value[0] == '!' or self.negativeconds:
                self.negativeconds = True
                returnvalue = value[1:]
            else:
                returnvalue = value
        else:
            valuemenu = multimenu(
                self.presetvalues,
                'Pick a value the column {} should have '.format(
                    get_color_string(bcolors.BLUE, self.screenname)))
            returnvalue = valuemenu.validanswers[valuemenu.answer]

        self.askmoremenu.prompt_valid()
        if self.askmoremenu.answer == 'y':
            self.addmorevalues = True
        else:
            self.addmorevalues = False

        return returnvalue
Beispiel #11
0
def CheckAvailability(songname, allnames):
    """Check if this song is in the db and try to guess if not"""
    songname = songname.lower()

    if songname not in allnames:
        suggestions = dict()
        for name in allnames:
            simratio = SequenceMatcher(None, songname, name).ratio()
            suggestions[simratio] = name
        ratios = sorted(suggestions.keys())
        ratios = ratios[-10:]
        ratios = sorted(ratios[-10:],reverse=True)
        suglist = dict()
        for idx, ratio in enumerate(ratios):
            suglist[str(idx)] = suggestions[ratio]
        suglist['n'] = 'ei mikään näistä'
        fuzzymenu = menus.multimenu(suglist, promptnow = 'Vastaako jokin näistä haettavaa laulua ({})?'.format(songname))
        if fuzzymenu.answer != 'n':
            return suglist[fuzzymenu.answer]
        else:
            sys.exit('Song "{}" not found. Exiting.'.format(songname))
        print('False')

    return songname
Beispiel #12
0
    def __init__(self, name, con):
        self.name = name
        self.presetvalues = dict()
        self.regexcond = False
        self.negativeconds = False
        #just initianiling a variable for the picksearchedval method
        self.addmorevalues = True
        #if possible, use a more user-friendly name to be shown
        try:
            self.screenname = ConllColumn.descriptivenames[name]
        except KeyError:
            self.screenname = name[0].upper() + name[1:]

        #If the values should not be freely determined but rather chosen from an existing list
        if name in ConllColumn.presetvalues:
            rows = con.FetchQuery(
                'SELECT {colname}, count({colname}) FROM {table} group by 1 order by 2 DESC'
                .format(colname=self.name, table=Db.searched_table))
            for idx, row in enumerate(rows):
                self.presetvalues[str(idx)] = row[0]

        #Add a menu for asking for more values
        self.askmoremenu = multimenu({'y': 'yes', 'n': 'no'})
        self.askmoremenu.question = 'Keep adding possible search values for this column?'
Beispiel #13
0
def printResults(thisSearch):
    if len(thisSearch.matches) > 0:
        printcount = input(
            'Found {} occurences. How many should I print? (press enter to print all)\n'
            .format(thisSearch.absolutematchcount))
        if printcount == '':
            printcount = thisSearch.absolutematchcount
        else:
            printcount = int(printcount)
        while printcount > thisSearch.absolutematchcount:
            printcount = int(
                input('Please give a number smaller than {}.'.format(
                    thisSearch.absolutematchcount + 1)))
        ordermenu = multimenu({
            'r': 'randomize',
            'n': 'Do not randomize'
        }, 'Should I randomize the order?')
        if ordermenu.answer == 'r':
            randomkeys = random.sample(list(thisSearch.matches), printcount)
            printmatches = list()
            for rkey in randomkeys:
                alignsegment = thisSearch.matches[rkey]
                #randomly select 1 of the matches in this segment
                printmatches.append(random.choice(alignsegment))
        else:
            printmatches = list()
            for align_id, matches in thisSearch.matches.items():
                for match in matches:
                    if len(printmatches) < printcount:
                        printmatches.append(match)
                    else:
                        break
        #actual printing
        #========================================
        csvrows = list()
        rows = list()
        table = Texttable()
        #Initialize table printer
        table.set_cols_align(["l", "l"])
        table.set_cols_valign(["m", "m"])

        if thisSearch.isparallel:
            headerrow = ['sl', 'tl', 'source']
        else:
            headerrow = ['concordance', 'source']
        csvrows = [headerrow]

        for idx, match in enumerate(printmatches):
            match.BuildSlContext()
            if thisSearch.isparallel:
                match.BuildTlContext()
                rows.append([
                    'Source text id: {}, Sentence id: {}, align id: {}\n'.
                    format(match.matchedword.sourcetextid,
                           match.matchedsentence.sentence_id, match.align_id),
                    ''
                ])
                rows.append([
                    get_color_string(bcolors.BLUE, match.slcontextstring),
                    get_color_string(bcolors.RED, match.tlcontextstring)
                ])
                csvrows.append([
                    match.slcontextstring, match.tlcontextstring,
                    match.matchedword.sourcetextid
                ])
            else:
                if len(match.slcontextstring) > 3000:
                    printedstring = match.matchedsentence.printstring + "(only sentence printed)"
                else:
                    printedstring = match.slcontextstring
                print(
                    '{}:\n=======================\n{}\n----------------------\n[Sentence id: {}, align id: {}, text_id: {}]\n\n\n'
                    .format(idx, textwrap.fill(printedstring),
                            match.matchedsentence.sentence_id, match.align_id,
                            match.matchedword.sourcetextid))
                csvrows.append(
                    [match.slcontextstring, match.matchedword.sourcetextid])
        if thisSearch.isparallel:
            table.add_rows(rows)
            print(table.draw() + "\n")
        #========================================
        csvmenu = multimenu({'y': 'yes', 'n': 'no'}, 'Save csv?', False)
        if csvmenu.answer == 'y':
            fname = input('Give the name of the csv:\n>')
            with open(fname, "w", newline='') as f:
                writer = csv.writer(f)
                try:
                    writer.writerows(csvrows)
                except TypeError:
                    import ipdb
                    ipdb.set_trace()
    else:
        print('Sorry, nothing found.')
        print(thisSearch.subquery)
        print(thisSearch.subqueryvalues)
Beispiel #14
0
 def WordCounts(self):
     """Count words in the texts"""
     #This needs to be improved:
     tables = {'source': 'fi_conll', 'target': 'ru_conll'}
     #########
     maxtitleLength = len('title')
     maxWcLength = len('word count')
     max_trtitleLength = len('translation title')
     max_trWcLength = len('trans. word count')
     texts = list()
     print('Analyzing...')
     #Start printing data for each text and target text
     results = self.CollectTexts()
     for res in results:
         #Fetch wordcount for each text and target text
         texts.append({
             'id':
             res['id'],
             'title':
             res['title'],
             'wordcount':
             self.wordCountForText(res['id'], tables['source']),
             'translation title':
             res['transtitle'],
             'trwordcount':
             self.wordCountForText(res['id'], tables['target'])
         })
         #Get string length information for the output table
         if len(res['title']) > maxtitleLength:
             maxtitleLength = len(res['title'])
         if len(texts[-1]['wordcount']) > maxWcLength:
             maxWcLength = len(texts[-1]['wordcount'])
         if len(res['transtitle']) > max_trtitleLength:
             max_trtitleLength = len(res['transtitle'])
         if len(texts[-1]['trwordcount']) > max_trWcLength:
             max_trWcLength = len(texts[-1]['trwordcount'])
     #Print the output table
     print(
         '{0:3} | {1:{titlewidth}} | {2:{wcwidth}} | {3:{trtitlewidth}} | {4:{trwcwidth}}'
         .format('Id',
                 'Title',
                 'Word count',
                 'Translation title',
                 'translation wordcount',
                 titlewidth=maxtitleLength,
                 wcwidth=maxWcLength,
                 trtitlewidth=max_trtitleLength,
                 trwcwidth=max_trWcLength))
     for text in texts:
         print(
             '{0:3} | {1:{titlewidth}} | {2:{wcwidth}} | {3:{trtitlewidth}} | {4:{trwcwidth}}'
             .format(text['id'],
                     text['title'],
                     text['wordcount'],
                     text['translation title'],
                     text['trwordcount'],
                     titlewidth=maxtitleLength,
                     wcwidth=maxWcLength,
                     trtitlewidth=max_trtitleLength,
                     trwcwidth=max_trWcLength))
     #Print csv if the user wants to:
     csvmenu = multimenu({'y': 'yes', 'n': 'no'}, 'Write csv?')
     if csvmenu.answer == 'y':
         with open('wordcounts.csv', 'w') as f:
             fieldnames = [
                 'id', 'title', 'wordcount', 'translation title',
                 'trwordcount'
             ]
             writer = csv.DictWriter(f, fieldnames=fieldnames)
             writer.writeheader()
             for text in texts:
                 writer.writerow(text)
Beispiel #15
0
 def __init__(self):
     self.menu = multimenu(Statmenu.menuoptions)
     self.menu.question = 'Select the function you would like to apply:'