Beispiel #1
0
def process_all_words():
    letters = "abcdefghijklmnopqrstuvwxyzáàãâäéèêëíìîïóòõôöúùûüçñ".decode(
        encoding='utf-8')
    alphabet = [[l2 + l1 for l1 in letters] for l2 in letters]
    for letter_combination in alphabet:
        for combination in letter_combination:
            # -----------------------------------------------------------------------------------
            result = db_utils.get_nodes_nids_by_regex("%s.+?" % combination,
                                                      label=d.SUBSTANTIVO)
            # -----------------------------------------------------------------------------------
            if result:
                for term in result:
                    try:
                        open('breakpoint.txt', 'w').write(str(combination))
                    except:
                        pass
                    # if term['nid'] == breakpoint_lemma:
                    #     breakpoint = True
                    #     continue
                    if db_utils.verify_node_oneness(term['nid'],
                                                    label=d.SUBSTANTIVO):
                        continue
                    else:
                        print "DUPLICADO:", term['nid']
        break
Beispiel #2
0
def process_all_words():
    lock_inner = False
    # -----------------------------------------------------------------------------
    result = db_utils.get_nodes_nids_by_regex(criteria)
    # -----------------------------------------------------------------------------
    if result:
        # Sort Result Alphabetically ----------------------
        for term in sorted(result, key=lambda k: k['nid']):
            if lock_inner:
                if term['nid'] == word_stop:
                    lock_inner = False
                else:
                    continue
            # --------------------------------------------
            try:
                open(breakpoint + '.txt', 'w').write(str(term['nid']))
            except:
                pass
            # --------------------------------------------
            # process lemma
            single_term = urllib.quote(str(term["nid"]))

            if process_semantic_relationships(single_term) is None:
                print 'unable to get page:', target_page + unidecode(
                    unicode(term['nid']))
                # db_utils.add_label_to_node(term['nid'], d.R_NOT_IN_WEB)
                pass
            else:
                print term['nid'], 'already done'
            continue
    else:
        print "No nodes match the criteria", criteria
Beispiel #3
0
def process_tenses_for_single_verb(verb):
    result = db_utils.get_nodes_nids_by_regex(verb, label=d.VERBO)
    if result:
        for term in result:
            if db_utils.verify_node_oneness(term['nid'], label=d.VERBO):
                for term_property in term.properties.iteritems():
                    if term_property[0] in d.VERB_TENSES:
                        for tense in term_property[1]:
                            tenses_collection = regex.split(r"[,\/]", tense)
                            tenses_unique = set()
                            for tense_ in tenses_collection:
                                if '-' == tense_:
                                    continue
                                for tense_parentheses in tense_.split('('):
                                    tenses_unique.add(tense_parentheses.replace(')', ''))
                                for tense_unique in tenses_unique:
                                    if db_utils.get_node(tense_unique, label=d.CONJUGACAO) is not None:
                                        continue
                                    # Create relationship
                                    verb_n = db_utils.get_node(term['nid'], label=d.VERBO)
                                    tense_n = db_utils.create_tense(tense_unique)
                                    if tense_n is not None and verb_n is not None:
                                        db_utils.create_tense_relationship(term['nid'], tense_unique, d.R_VERB_TENSE)
                                        print '\r', tense_unique,
                                    else:
                                        print "Fodeu, %s ou %s não existem" % (term['nid'], tense_unique)
            else:
                from unidecode import unidecode
                print "Fodeu, " + unidecode(term['nid']) + " tá duplicado"
            print '\r', term['nid'], "completed!"
            print '-' * 100
def process_all_words():
    lock = True
    lock_inner = True
    reverse_alphabet = False
    letters = "abcdefghijklmnopqrstuvwxyzáàãâäéèêëíìîïóòõôöúùûüçñ".decode(encoding='utf-8')
    alphabet = [[l2 + l1 for l1 in letters] for l2 in letters]
    for letter_combination in sorted(alphabet, reverse=reverse_alphabet):
        for combination in letter_combination:
            print "+" * 50
            print combination
            if lock:
                if frag == combination:
                    lock = False
                else:
                    continue
            # -----------------------------------------------------------------------------
            result = db_utils.get_nodes_nids_by_regex("%s.+?" % combination)
            # -----------------------------------------------------------------------------
            if result:
                # Sort Result Alphabetically ----------------------
                for term in sorted(result, key=lambda k: k['nid']):
                    # if word_stop == term["nid"]:
                    #     lock_inner = False
                    #     continue
                    if "'" in term["nid"]:
                        continue
                    if lock_inner:
                        if term['nid'] == word_stop:
                            lock_inner = False
                        else:
                            continue
                    # --------------------------------------------
                    print len(result), term["nid"], "-" * 50
                    try:
                        if reverse_alphabet:
                            open(breakpoint + '_' + frag + '_r.txt', 'w').write(str(combination + ":" + term['nid']))
                        else:
                            open(breakpoint + '_' + frag + '.txt', 'w').write(str(combination + ":" + term['nid']))
                    except:
                        pass
                    # --------------------------------------------
                    # process lemma
                    if process_semantic_relationships(term['nid']) is None:
                        print 'unable to get page:', target_page + unidecode(unicode(term['nid']))
                        db_utils.add_label_to_node(term['nid'], d.R_NOT_IN_WEB)
                        pass
                    else:
                        print term['nid'], 'already done'
                    continue
Beispiel #5
0
def process_tenses():
    letters = "abcdefghijklmnopqrstuvwxyzáàãâäéèêëíìîïóòõôöúùûüçñ".decode(encoding='utf-8')
    alphabet = [[l2 + l1 for l1 in letters] for l2 in letters]
    for letter_combination in alphabet:
        for combination in letter_combination:
            result = db_utils.get_nodes_nids_by_regex("%s.+?" % combination, label=d.VERBO)
            if result:
                for term in result:
                    try:
                        open('breakpoint.txt', 'w').write(term['nid'])
                    except:
                        pass
                    if term['nid'] == breakpoint_lemma:
                        breakpoint = True
                        continue

                    if not breakpoint:
                        print term['nid'], 'already done'
                        continue

                    if db_utils.verify_node_oneness(term['nid'], label=d.VERBO):
                        for term_property in term.properties.iteritems():
                            if term_property[0] in d.VERB_TENSES:
                                for tense in term_property[1]:
                                    tenses_collection = regex.split(r"[,\/]", tense)
                                    tenses_unique = set()
                                    for tense_ in tenses_collection:
                                        if '-' == tense_:
                                            continue
                                        for tense_parentheses in tense_.split('('):
                                            tenses_unique.add(tense_parentheses.replace(')', ''))
                                        for tense_unique in tenses_unique:
                                            if db_utils.get_node(tense_unique, label=d.CONJUGACAO) is not None:
                                                continue
                                            # Create relationship
                                            verb_n = db_utils.get_node(term['nid'], label=d.VERBO)
                                            tense_n = db_utils.create_tense(tense_unique)
                                            if tense_n is not None and verb_n is not None:
                                                db_utils.create_tense_relationship(term['nid'], tense_unique, d.R_VERB_TENSE)
                                                print '\r', tense_unique,
                                            else:
                                                print "Fodeu, %s ou %s não existem" % (term['nid'], tense_unique)
                    else:
                        from unidecode import unidecode
                        print "Fodeu, " + unidecode(term['nid']) + " tá duplicado"
                    print '\r', term['nid'], "completed!"
                    print '-' * 100
#!/usr/bin/python
# -*- coding: utf-8 -*-
from generators.utils import db_utils
from generators.utils import defs as d

for node in db_utils.get_nodes_nids_by_regex(".+?[ -].+?"):
    db_utils.add_label_to_node(node.properties["nid"], d.R_EXPRESSION)
Beispiel #7
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
from generators.utils import db_utils
from generators.utils import defs as d

if __name__ == '__main__':
    result = db_utils.get_nodes_nids_by_regex(".+?/.+?", label=d.CONJUGACAO)
    if result:
        for term in result:
            rel_data = db_utils.get_conjugation_root(term['nid']).data()
            for data in rel_data:
                c = data['c']
                r = data['r']
                v = data['v']

                print c['nid']
                if v is None:
                    print "WTF!?", c
                    continue

                tense_unique = term['nid'].split('/')
                for tense in tense_unique:
                    print '\t', tense
                    verb_n = db_utils.get_node(v['nid'], label=d.VERBO)
                    if db_utils.get_node(tense, label=d.CONJUGACAO) is None:
                        tense_n = db_utils.create_tense(tense)
                        if tense_n is not None and verb_n is not None:
                            print '\t\tInserido:', tense
                            db_utils.create_tense_relationship(
                                tense_n['nid'], verb_n['nid'], d.R_VERB_TENSE)
                        else: