Exemple #1
0
def main(query):
	
	x = int(sys.argv[1])
	if(x==1):
		kmp.kmp(sys.argv[2])
	elif(x==2):
		bm.bm(sys.argv[2])
	elif(x==3):
		regex.regex(sys.argv[2])
Exemple #2
0
 def __init__(self):
     self.re = regex()
     self.map = my_map()
     self.clf = None
     self.vocab = None
     self.re = regex()
     self.map = my_map()
     self.strong_learner = None
     self.vocab = None
     self.max_length = None
Exemple #3
0
 def __init__(self, run=True):
     self.re = regex()
     self.map = my_map()
     self.clf = None
     self.vocab = None
     self.re = regex()
     self.map = my_map()
     self.strong_learner = None
     self.vocab = None
     self.max_length = None
     self.spliter = SentenceSpliter()
     if run: self.run()
Exemple #4
0
 def __init__(self):
     self.re = regex()
     self.map = my_map()
     self.clf = None
     self.vocab = None
     self.re = regex()
     self.map = my_map()
     self.strong_learner = None
     self.vocab = None
     self.max_length = None
     self.spliter = SentenceSpliter()
     Tokenizer.run(self)
Exemple #5
0
def extractContent(content, settings):
    """
    Extract the desired content from the supplied raw text from a file.

    Inputs:
        filename[unicode]: The file to read (known to exist already).
        settings[dict]: The setting from the createToken method.
    """
    raw = copy.copy(content) # complete copy of original
    if settings['re'] is not None:
        content = regex(settings['re'], content, eval(settings['re-flags']))

    elif settings['line'] is not None:
        content = extractLine(content, settings["line"])

    elif (settings['start'] is not None) or (settings['end'] is not None):
        content = extractLineRange(content,
                                   settings['start'],
                                   settings['end'],
                                   settings['include-start'],
                                   settings['include-end'])

    content = prepareContent(content, settings)

    # Locate the line
    line = 1
    match = re.search(r'(.*?)$', content, flags=re.MULTILINE)
    if match is not None:
        first = match.group(1)
        for i, raw_line in enumerate(raw.splitlines()):
            if first in raw_line:
                line = i
                continue
    return content, line
Exemple #6
0
 def __init__(self, uploaded_files, text, option):
     if (option == "pilihan1"):
         kmp_0 = kmp.kmp()
         kmp_0.convertText(uploaded_files)
         if (kmp_0.kmpMatch(text.lower()) == -1):
             self.hasil = [[
                 '', "Tidak ditemukan " + text + " pada file.", ''
             ]]
         else:
             self.hasil = [[
                 '',
                 "indeks pada file: " + str(kmp_0.kmpMatch(text.lower())),
                 ''
             ]]
     elif (option == "pilihan2"):
         boyer = boyce.boyce()
         boyer.convertText(uploaded_files)
         if (boyer.bmMatch(text.lower()) == -1):
             self.hasil = [[
                 '', "Tidak ditemukan " + text + " pada file.", ''
             ]]
         else:
             self.hasil = [[
                 '',
                 "indeks pada file: " + str(boyer.bmMatch(text.lower())), ''
             ]]
     else:
         reg = regex.regex(uploaded_files)
         self.hasil = reg.regexMatch(text)
def extractContent(content, settings):
    """
    Extract the desired content from the supplied raw text from a file.

    Inputs:
        filename[unicode]: The file to read (known to exist already).
        settings[dict]: The setting from the createToken method.
    """
    raw = copy.copy(content)  # complete copy of original
    if settings['re'] is not None:
        content = regex(settings['re'], content, eval(settings['re-flags']))

    elif settings['line'] is not None:
        content = extractLine(content, settings["line"])

    elif (settings['start'] is not None) or (settings['end'] is not None):
        content = extractLineRange(content, settings['start'], settings['end'],
                                   settings['include-start'],
                                   settings['include-end'])

    content = prepareContent(content, settings)

    # Locate the line
    line = 1
    match = re.search(r'(.*?)$', content, flags=re.MULTILINE)
    if match is not None:
        first = match.group(1)
        for i, raw_line in enumerate(raw.splitlines()):
            if first in raw_line:
                line = i
                continue
    return content, line
Exemple #8
0
def formatPcb(sub_pcb,tmpDir,copies):

    bltup = tuple([0,0,0,0])
   
    for cop in range(copies):
        let = chr(ord('A') + cop)

        reg_conn = re.compile(r"(.*)Connect\(\"(\D+)(\d+)")
        subs_conn = r'\1Connect("\2_%sx\3' % let
        
        reg_ele = re.compile(regex('EleRef',bltup))
        subs_ele = r'Element[\1 \2 "\3_%sx\4" \5' % let

        reg_Net = re.compile(r'Net\("(\w+)" "(.*)"\)')
        subs_Net = r'Net("\1_%sx" "\2")' % let
        reg_Netpgnd = re.compile(r'Net\("PGND"')

        file = "%s%s%s" % (tmpDir,sub_pcb,cop)
        print file

        for line in fileinput.input([file],inplace = True):
            mat_conn = reg_conn.findall(line)
            mat_ele = reg_ele.findall(line)
            mat_Net = reg_Net.findall(line)
            mat_Netpgnd = reg_Netpgnd.findall(line)
            if mat_conn:
                line = reg_conn.sub(subs_conn,line)
            elif mat_ele:
                line = reg_ele.sub(subs_ele,line)
            elif mat_Net:
                if not(mat_Netpgnd):
                    line = reg_Net.sub(subs_Net,line)
            print line,
Exemple #9
0
def removeBox_pcb(file,pts):
    l1 = regex('l1',pts)
    l2 = regex('l2',pts)
    l3 = regex('l3',pts)
    l4 = regex('l4',pts)

    for line in fileinput.input([file],inplace=True):
        if (line == "%s%s" %(l1,"\n")):
            a =1
        elif (line == "%s%s" % (l2,"\n")):
            a=1
        elif (line == "%s%s" % (l3,"\n")):
            a=1
        elif (line == "%s%s" % (l4,"\n")):
            a=1
        else:
            print line
Exemple #10
0
def run_M_m(V):
    if V.pending_command[-1:] == CR:
        V.pending_command = V.pending_command[1:-1]
        reg = regex.regex(":%s/")
        reg.source(V.pending_command)
        command = reg.get_final()

        V.input(command)
        V.pending_command = ""
Exemple #11
0
def run_M_m(V):
    if V.pending_command[-1:] == CR:
        V.pending_command = V.pending_command[1:-1]
        reg = regex.regex(":%s/")
        reg.source(V.pending_command)
        command = reg.get_final()

        V.input(command)
        V.pending_command = ""
Exemple #12
0
def getRefdegs(sub_sch,dir):
    """Search through sub_sch and return all reference designators."""
    refdegs = []
    bltup = tuple([0,0,0,0])
    file = "%s%s0" % (dir,sub_sch)
    reg = re.compile(regex('ref',bltup))
    subs = regex('sref',bltup)

    for line in fileinput.input([file],inplace = True):
        res = reg.findall(line)
        if res:
            pre = res[0][0]
            post = res[0][1]
            refdegs.append((pre,post))

        line = reg.sub(subs,line)
        sys.stdout.write(line)
            
    return refdegs
Exemple #13
0
def run_M_s(V):
    if V.pending_command[-1:] == CR:
        V.pending_command = V.pending_command[1:-1]
        reg = regex.regex(":s/")
        l = V.pending_command.split("/")
        reg.source(l)
        command = reg.get_final()

        V.input(command)
        V.pending_command = ""
Exemple #14
0
def run_M_S(V):
    if V.pending_command[-1:] == CR:
        V.pending_command = V.pending_command[1:-1]
        reg = regex.regex(":s/")
        reg.source(V.pending_command)
        reg.add_flag('g')
        command = reg.get_final()

        V.input(command)
        V.pending_command = ""
Exemple #15
0
def run_M_S(V):
    if V.pending_command[-1:] == CR:
        V.pending_command = V.pending_command[1:-1]
        reg = regex.regex(":s/")
        reg.source(V.pending_command)
        reg.add_flag('g')
        command = reg.get_final()

        V.input(command)
        V.pending_command = ""
Exemple #16
0
def drawBox_pcb(file,pts):
    l1 = regex('l1',pts)
    l2 = regex('l2',pts)
    l3 = regex('l3',pts)
    l4 = regex('l4',pts)

    reg_dr = re.compile(r"Layer\(1 ")
    prtNext = False
    for line in fileinput.input([file],inplace=True):
        mat = reg_dr.findall(line)
        if prtNext == True:
            prtNext = False
            print line,
            print l1
            print l2
            print l3
            print l4
        else:
            print line,

            if mat:
                a = 1
                prtNext = True
Exemple #17
0
def formatSch(sub_sch,tmpDir,copies):
    """Format all .sch files in dir to have incremented refdegs"""

    # change .sch files
    bltup = tuple([0,0,0,0])
    reg = re.compile(regex('stdref',bltup))

    for cop in range(0,copies):
        let = chr(ord('A') + cop)
        subs = r"refdes=\1_%sx\2" % (let)
        file = "%s%s%s" % (tmpDir,sub_sch,cop)

        for line in fileinput.input([file],inplace=True):
            mat = reg.sub(subs,line)
            print mat,
Exemple #18
0
def js_detect(url, r, debug=False):
    '''
    url = ""
    debug = False
    call_count = list()
    sub_func_dict = dict()
    char_freq_dict = dict()
    string_len_dict = dict()
    parsed = ""
    reg_result = ""
    '''
    parser = PyJsParser()
    soup = bs(r, 'html.parser')
    tot_script = ""
    for script in soup.find_all('script'):
        out = ""
        try:
            out = script['src']
            if debug:
                print("getting outer js")
            #getting scripts not in site
            '''
            if out[:4] != "http":
                tot_script = get_outer_js(urljoin(self.url, out))
            else:
                tot_script = get_outer_js(out)
            '''
        except:
            tot_script += script.get_text()
    
    reg_result = []
    if tot_script != "":
        '''
        count_char(tot_script)
        a = parser.parse(tot_script)
        read_dic_2(a)
        read_dic(a)
        '''
        reg_result = regex().match(tot_script)
    return reg_result        
Exemple #19
0
# -*- encoding: utf-8 -*-

import unicodedata
import regex
from nlp_tools import tokenizer

my_regex = regex.regex()


def is_exist(dictionary, element):
    try:
        _ = dictionary[element]
        return True
    except:
        return False


def preprocessing(data, tokenize=True):
    data = unicodedata.normalize('NFKC', data)
    if tokenize:
        data = tokenizer.predict(data)
    data = my_regex.detect_url.sub(u'', data)
    data = my_regex.detect_url2.sub(u'', data)
    data = my_regex.detect_email.sub(u'', data)
    data = my_regex.detect_datetime.sub(u'', data)
    data = my_regex.detect_num.sub(u'', data)
    data = my_regex.normalize_special_mark.sub(u' \g<special_mark> ', data)
    data = my_regex.detect_exception_chars.sub(u'', data)
    data = my_regex.detect_special_mark.sub(u'', data)
    data = my_regex.detect_special_mark2.sub(u'', data)
    data = my_regex.detect_special_mark3.sub(u'', data)
# -*- encoding: utf-8 -*-

import regex
import os, sys
import my_map
import utils
from io import open
import unicodedata
from nlp_tools import tokenizer

r = regex.regex()


def load_dataset_from_disk(dataset):
    list_samples = {k: [] for k in my_map.name2label.keys()}
    stack = os.listdir(dataset)
    print 'loading data in ' + dataset
    while (len(stack) > 0):
        file_name = stack.pop()
        file_path = os.path.join(dataset, file_name)
        if (os.path.isdir(file_path)):
            utils.push_data_to_stack(stack, file_path, file_name)
        else:
            print('\r%s' % (file_path)),
            sys.stdout.flush()
            with open(file_path, 'r', encoding='utf-16') as fp:
                content = unicodedata.normalize('NFKC', fp.read())
                content = r.run(tokenizer.predict(content))
                dir_name = utils.get_dir_name(file_path)
                list_samples[dir_name].append(content)
    print('')
Exemple #21
0
# a simple lexer that tokenizes based on whitespace
from regex import regex

lowercase = "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z"
uppercase = "A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z"
symbols = "+|-|=|.|,|/|%|^|<|>"

alphabet = lowercase

rule = regex("(%s)(%s)*"%(alphabet, alphabet))
white_space = regex("  *")

def lex(s):
    s = s[white_space.greedy(s):]
    if not s:
        return []
    word_c = rule.greedy(s)
    return [s[:word_c]]+lex(s[word_c:])
    
print lex("abc def ghijk")
Exemple #22
0
def copy(pcb_file,spcb_file,copies,projPath,tmpPath):
    """
        *get MinMax pts from pcb and sub
        *generate offsets for all copies
        *open temporary files
        **Elements
        **nets
        *Look in pcb0
        **Skip lines until after attribute line
        **copy elements from all files into elemTemp file
        ***shift coordinates when adding elems to this file
        **copy layer1 lines into lay1temp file
        ***shift coord
        **copy layer2 lines into lay2temp file
        ***shift coord
        **copy Nets to be inserted in existing pcb nets into CopnetTemp
        **copy Nets to be combined with existing pcb nets in ComNetTemp
     """
    blTup = tuple([0,0,0,0])
    pcb_file = "%s%s" % (projPath,pcb_file)
    subpcb_file = "%s%s%s" % (tmpPath,spcb_file,0)

    shift = getShift(pcb_file,subpcb_file,copies)
    x_shift = shift[0]
    y_shift = shift[1]

    reg_befAt = re.compile(regex('befAt',blTup))
    reg_ele = re.compile(regex('Ele',blTup))
    reg_lay1 = re.compile(regex('lay1',blTup))
    reg_lay2 = re.compile(regex('lay2',blTup))
    reg_lay3 = re.compile(regex('lay3',blTup))
    reg_line = re.compile(regex('line',blTup))
    reg_netcop = re.compile(regex('net',blTup))
    reg_netcom = re.compile(regex('netpgnd',blTup))

    ele = []
    lay1 = []
    lay2 = []
    inNetcop = []
    inNetcom = []

    for cop in range(copies):
        subpcb_file = "%s%s%s" % (tmpPath,spcb_file,cop)
        state = searchEnum.befAtt # Initialize searching state

        for line in fileinput.input([subpcb_file],inplace = False):
            if state == searchEnum.befAtt:
                mat = reg_ele.findall(line)
                if mat:
                    mat = mat[0]
                    state = searchEnum.inEle
                    x = int(mat[1]) + x_shift[cop]
                    y = int(mat[2]) + y_shift[cop]
                    line = "Element[%s %s %s %s" % (mat[0],x,y,mat[3])
                    ele.append(line)
            elif state == searchEnum.inEle:
                mat = reg_lay1.findall(line)
                mat_ele = reg_ele.findall(line)
                if mat_ele:
                    mat_ele = mat_ele[0]
                    state = searchEnum.inEle
                    x = int(mat_ele[1]) + x_shift[cop]
                    y = int(mat_ele[2]) + y_shift[cop]
                    line = "Element[%s %s %s %s" % (mat_ele[0],x,y,mat_ele[3])
                    ele.append(line)
                elif mat:
                    state = searchEnum.inlay1
                else:
                    ele.append(line)
            elif state == searchEnum.inlay1:
                mat = reg_lay2.findall(line)
                matig = re.compile(r"^(\()$").findall(line)
                matig2 = re.compile(r"^(\))$").findall(line)
                if mat:
                    state = searchEnum.inlay2
                elif matig or matig2:
                    a = 1
                else:
                    lnpts = reg_line.findall(line)[0]
                    x0 = int(lnpts[0]) + x_shift[cop]
                    y0 = int(lnpts[1]) + y_shift[cop]
                    x1 = int(lnpts[2]) + x_shift[cop]
                    y1 = int(lnpts[3]) + y_shift[cop]

                    line = "    Line[%s %s %s %s %s\n" % (x0,y0,x1,y1,lnpts[4])
                    lay1.append(line)
            elif state == searchEnum.inlay2:
                mat = reg_lay3.findall(line)
                matig = re.compile(r"^(\()$").findall(line)
                matig2 = re.compile(r"^(\))$").findall(line)
                if mat:
                    state = searchEnum.inlay3
                elif matig or matig2:
                    a = 1
                else:
                    lnpts = reg_line.findall(line)[0]
                    x0 = int(lnpts[0]) + x_shift[cop]
                    y0 = int(lnpts[1]) + y_shift[cop]
                    x1 = int(lnpts[2]) + x_shift[cop]
                    y1 = int(lnpts[3]) + y_shift[cop]

                    line = "    Line[%s %s %s %s %s\n" % (x0,y0,x1,y1,lnpts[4])
                    lay2.append(line)
            elif state == searchEnum.inlay3:
                mat_cop = reg_netcop.findall(line)
                mat_com = reg_netcom.findall(line)

                if mat_com:
                    inNetcom.append(line)
                    state = searchEnum.inNetcom
                elif mat_cop:
                    inNetcop.append(line)
                    state = searchEnum.inNetcop

            elif state == searchEnum.inNetcom:
                mat_cop = reg_netcop.findall(line)
                mat_com = reg_netcom.findall(line)
                if mat_cop:
                    inNetcop.append(line)
                    state = searchEnum.inNetcop
                else:
                     inNetcom.append(line)
            elif state == searchEnum.inNetcop:
                mat_cop = reg_netcop.findall(line)
                mat_com = reg_netcom.findall(line)
                 
                if mat_com:
                    state = searchEnum.inNetcom
                    inNetcom.append(line)
                elif mat_cop:
                    inNetcop.append(line)
                else:
                    inNetcop.append(line)
    #Remove ending parenthesis
    temp = []
    reg = re.compile(r'^\)')
    for ln in inNetcop:
        mat = reg.findall(ln)
        if not(mat):
            temp.append(ln)
    inNetcop = temp 
    #Format the insert combined nets into a dictionary
    key = 'none'
    netComDict = {key:[]}
    for ln in inNetcom:
        mat = reg_netcom.findall(ln)
        mat_ign = re.compile(r'\($').findall(ln)
        mat_ign2 = re.compile(r'^\t\)$').findall(ln)
        if mat_ign or mat_ign2:
            a = 1
        elif mat:
            key = mat[0]
        else:
            try:
                netComDict[key].append(ln)
            except KeyError :
                netComDict[key] = []
                netComDict[key].append(ln)

    #Now move the list vars to the main pcb file
    '''
    ele = []
    lay1 = []
    lay2 = []
    inNetcop = []
    inNetcom = []

class searchEnum:
    befAtt = 0 # Search has not yet passed the Attribute line
    inEle = 1 # Search is in the elements section
    inlay1 = 2 # Search is in the layer1 section
    inlay2 = 3 # Search is in the layer2 section
    inlay3 = 4
    inNetcom = 5 # Search is in the nets to be copied section
    inNetcop = 6 # Search is in the nets to be combined section

    reg_befAt = re.compile(regex('befAt',blTup))
    reg_ele = re.compile(regex('Ele',blTup))
    reg_lay1 = re.compile(regex('lay1',blTup))
    reg_lay2 = re.compile(regex('lay2',blTup))
    reg_lay3 = re.compile(regex('lay3',blTup))
    reg_line = re.compile(regex('line',blTup))
    reg_netcop = re.compile(regex('net',blTup))
    reg_netcom = re.compile(regex('netpgnd',blTup))
    '''
    state = searchEnum.befAtt
    for line in fileinput.input([pcb_file],inplace = True):
        if state == searchEnum.befAtt:
            mat = reg_ele.findall(line)
            if mat:
                state = searchEnum.inEle
                for ln in ele:
                    print ln,
                print line,
            else:
                print line,
        elif state == searchEnum.inEle:
            mat = reg_lay1.findall(line)
            if mat:
                state = searchEnum.inlay1
                print line,
                print "(\n"
                for ln in lay1:
                    print ln,
            else:
                print line,
        elif state == searchEnum.inlay1:
            mat_ign = re.compile(r'^\(').findall(line)
            mat = reg_lay2.findall(line)
            if mat_ign:
                a = 1
            elif mat:
                state = searchEnum.inlay2
                print line,
                print "(\n"
                for ln in lay2:
                    print ln,
            else:
                print line,
        elif state == searchEnum.inlay2:
            mat_ign = re.compile(r'^\(').findall(line)
            mat = reg_lay3.findall(line)
            if mat_ign:
                a = 1
            elif mat:
                state = searchEnum.inlay3
                print line,
            else:
                print line,
        elif state == searchEnum.inlay3:
            mat_net = re.compile(r'^NetList\(\)').findall(line)
            if mat_net:
                state = searchEnum.inNet
                print line,
                print "(\n"
                for ln in inNetcop:
                    print ln,
            else:
                print line,
        elif state == searchEnum.inNet:
            mat_ign = re.compile(r'^\(').findall(line)
            mat_com = reg_netcom.findall(line)
            if mat_ign:
                a = 1
            elif mat_com:
                print line,
                print "    (\n"
                for ln in netComDict[mat_com[0]]:
                    print ln,
                state = searchEnum.inNetcop
            else:
                print line,
        elif state == searchEnum.inNetcop:
            state = searchEnum.inNet
#eof
    '''
    print "lay2\n"
    for ln in lay2:
        print ln,
    '''
    '''
    print "netComDict\n"
    for ln in netComDict['PGND']:
        print ln,
    '''
    '''
Exemple #23
0
        return connection, db


if __name__ == '__main__':
    import io
    from regex import regex
    from tokenizer.tokenizer import Tokenizer

    config = get_stories.config
    connection, db = get_stories.connect2mongo(config['MONGO_HOST'],
                                               config['MONGO_PORT'],
                                               config['MONGO_USER'],
                                               config['MONGO_PASS'],
                                               config['MONGO_DB'])

    stories = get_stories()
    stories.run(db)
    r = regex()
    # Prepare Data
    with io.open('vie.txt', 'w+', encoding="utf-8") as f:
        for story in stories.new_stories:
            reg_text = r.run(story)
            token_text = ViTokenizer.tokenize(reg_text)
            f.write(token_text.lower())
            f.write(u'\n')
        print "Save data success into vie.txt"

    elapsed_time = time.time() - start_time
    print "Total_Time for Excute: ", elapsed_time

    connection.close()