class main: try: if len(sys.argv) < 2: url = raw_input("Please enter URL: ") if ' ' in url: raise Exception("InputError") parseObj = Parse(url) else: url = sys.argv[1] parseObj = Parse(url) except ValueError as e: print(e) sys.exit() title = parseObj.getTitle() keyword = parseObj.getKeyword() content = parseObj.getParsedContent() content = content[-20:] header = parseObj.getParsedHeader(title, keyword) header = header[-10:] analyzer = mergeBag(header, content, 3) analyzer = sortBag(analyzer) analyzer = list(analyzer[-8:]) analyzer.reverse() print("\nWebpage: " + url) print('\nKeywords:') for w in analyzer: print(w[0]) print('')
def main(): print(colored("\n#####################################################",'green')) print(colored(" Welcome to Team-15 DataBase Management System ",'green')) print(colored("#####################################################\n",'green')) userLoginSignUp() query = "" database="" queryProcessor=qp.QueryProcessor() while not query.lower() == "quit": query=input(constants.InputQuery) if "use" in query.lower(): Parse.Parse.newDB=True db_raw=re.compile(r'use\s(.*)\s*',re.IGNORECASE).findall(query) database=db_raw[0] query=input() else: database query_type = Parse.Parse(database,query,queryProcessor) val = query_type.check_query() print("\n#####################################################") if val == -1: print(colored("Incorrect Query",'red')) elif val == 0: break print("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Thanks!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
def __init__(self): self.__stroage = Storeage.Stroage() self.__chrome_options = webdriver.ChromeOptions() # self.__chrome_options.add_argument('--proxy-server=%s' % PROXY) self.__chrome_options.add_experimental_option( "prefs", {'profile.default_content_setting_values.images': 2}) self.__parse = Parse.Parse()
def parse_document(self): # extract all relevant document fields via the parser parser = Parse.Parse(self.constants, self.stop_words, self.is_stemming) parser.parse_document(self.content) self.length = parser.position self.max_tf = parser.max_tf self.docno = parser.docno self.terms = parser.terms del parser self.num_of_words = len(self.terms)
def ReadFromFile(fileName): fs = open(fileName, 'r') n = int(fs.readline()) type = fs.readline() type = type.rstrip('\n') k = 0 equations = [] for i in range(0, n): equations.append(fs.readline()) temp = [] IntialGuesses = [] if type == 'seidel' or type == "All": temp = fs.read().split(" ") IntialGuesses = [int(numeric_string) for numeric_string in temp] if type == "All": Parse.Parse(equations, "LU", n, [], 0, 0) Parse.Parse(equations, "Gaussian-jordan", n, [], 0, 0) Parse.Parse(equations, "Gaussian-elimination", n, [], 0, 0) Parse.Parse(equations, "seidel", n, IntialGuesses, 50, 0.00001) if type == 'seidel': Parse.Parse(equations, type, n, IntialGuesses, 50, 0.00001)
def beginTransaction(self): print("\n--------------------------------------------------------") print("tranasction started") query = "" while not query.lower() == "quit": query = input() query_type = prs.Parse(self.database, query, self.queryProcessor) val = query_type.check_query() if val == -1: print(colored("Incorrect Query", 'red')) elif val == 0: break print("transaction ended") print("\n--------------------------------------------------------")
def parse_from_file(filename, extension, format, scale): try: file = open(filename + ".txt", "r") lines = file.readlines() file.close() points = [] for line in lines: points.append(make_tuple(line)) parser = Parse.Parse(filename, extension) color_points = parser.get_all_colors(points) return parser.width, parser.height, color_points except IOError: print("Error opening text file " + filename + ".txt") return 0, 0, []
def parse_image(filename, extension, pass_number, threshold, precision, min_size, max_size, threads): # Open the file parser = Parse.Parse(filename, extension) if parser.is_opened(): # Set options for parser parser.threshold = threshold parser.precision = precision parser.minimum_size = min_size parser.maximum_size = max_size parser.num_threads = threads parser.pass_number = pass_number # Parse the image parser.evaluate_image()
def main1(): dat = util.get_labeled_questions(str("data/nt-13588_2.tsv"), "data") fLog = sys.stdout for i, qinfo in enumerate(dat, 1): if qinfo.seq_qid[-1] != '0': parse = Parse() parse.type = Parse.FollowUp cond = Condition(3, Condition.OpEqRow, 7) parse.conditions = [cond] pred = parse.run(qinfo, resinfo) fLog.write("(%s) %s\n" % (qinfo.seq_qid, qinfo.question)) fLog.write("Answer: %s\n" % ", ".join( ["(%d,%d)" % coord for coord in qinfo.answer_coordinates])) fLog.write("Predictions: %s\n" % ", ".join(["(%d,%d)" % coord for coord in pred])) fLog.write("\n") fLog.flush() # use the gold answers resinfo = util.ResultInfo(qinfo.seq_qid, qinfo.question, qinfo.ques_word_sequence, qinfo.answer_coordinates, qinfo.answer_column_idx)
def work(self): printer = open("results.html", "w") percPrinter = open("perc", "w") sprinter = StringIO.StringIO() printer.write('<html>\n<head>\n') printer.write( '<link rel="stylesheet" type="text/css" href="topstyle.css">') printer.write('\n<body>') printer.write("\n<table class=border>\n") correctCount = 0 count = 0 p = Parse.Parse("input") try: while True: count += 1 a = p.parseVI() if a == None: break self.done = True self.doprint(printer, sprinter, _TRSTART + str(count) + _TDEND) self.resultReturned = False self.runtime = False self.typeOk = True self.eobj = None self.result = None def inner(): try: self.result = TrophyShelf.countVisible(copy.copy(a)) self.resultReturned = True except: self.runtime = True self.eobj = sys.exc_info()[0:2] #result = CLASS.METHOD(ARGCALL) try: t = StoppableThread.StoppableThread(target=inner) t.start() t.join(_RUNSECS) if t.isAlive(): t.terminate() t.join() self.runtime = False self.done = False except: self.done = False self.typeOk = type(self.result) == types.ListType expected = p.matchIntList(self.result) if not self.done or not self.resultReturned or not self.typeOk: self.doprint(printer, sprinter, _TDFAIL) if self.runtime: sprinter.write('<TD>runtime exception:') #sprinter.write(' exception: '+str(self.eobj[0])) traceback.print_exception(self.eobj[0], self.eobj[1], None, None, sprinter) elif not self.resultReturned: sprinter.write('<TD>time limit exceeded:') elif not self.typeOk: sprinter.write("<TD>wrong return type: got <xmp>" + str(type(self.result)) + "</xmp>") sprinter.write("expected <xmp>" + str(types.ListType) + "</xmp>") else: sprinter.write('<TD>strange error, contact professor:') sprinter.write(str(a) + ' ') self.doprint(printer, sprinter, _TDEND) elif not p.ok(): printer.write(_TDFAIL + '</TR>\n') sprinter.write(_TDFAIL + '<TD>expected<PRE> ') sprinter.write(str(expected)) sprinter.write('</PRE>got<BR><PRE>') sprinter.write(str(self.result)) sprinter.write('</PRE>: ') sprinter.write(str(a) + ' ') sprinter.write(_TDEND + '\n') else: correctCount += 1 printer.write(_TDPASS + "\n") sprinter.write('<TD class=pass>pass</TD><TD>') sprinter.write('got<br><PRE> ') sprinter.write(str(self.result)) sprinter.write('</PRE>: ') sprinter.write(str(a) + ' ') sprinter.write(_TDEND + '\n') except: pass finally: if count <= 1: perc = 0.0 else: perc = correctCount * 1.0 / (count - 1) line = '<!--PERC:%1.4f --><P>' % (perc) printer.write(line + '\n') line = '%1.5f' % (perc) percPrinter.write(line + '\n') if correctCount == count - 1: printer.write('<!-- ' + _ALLPASS + ' -><P>\n') printer.write('# of correct: ' + str(correctCount)) printer.write(' out of ' + str(count - 1) + '\n') printer.write(sprinter.getvalue() + '\n') printer.write('</table>\n</body>\n</html>\n') sprinter.close() printer.close() percPrinter.close()
import sys import Tokenize import Parse if (len(sys.argv) > 1): lines, expansionmap = Parse.Parse(Tokenize.Tokenize(sys.argv[1])) if (lines is not None): for line in lines: print ','.join(line) print "expansionmap is", expansionmap else: print >> sys.stderr, "Error: An argument is required"
def lex_and_parse(str_in): tmp = Lex.Lex(str_in) tmp = Parse.Parse(tmp) return tmp
def runQuery(self, ugeoip, urdapip, urdapentity, uusrips, output): ## output to Fiel or screen ? outPtr = None if output != "": outf = Parse(output) outPtr = outf.getOutFilePtr() queryforIPs = [] if len(uusrips) == 1 and uusrips[0] == '*': queryforIPs = self.iplist else: queryforIPs = uusrips for ip in queryforIPs: if (outPtr != None): outPtr.write("\n@@@@@@@@@@@@@@@@@@ [" + ip + "] @@@@@@@@@@@@@@@@@@\n") else: print "\n@@@@@@@@@@@@@@@@@@ [" + ip + "] @@@@@@@@@@@@@@@@@@\n" ## run GeoIP info if (outPtr != None): outPtr.write("[ GEO IP ]\n") else: print "[ GEO IP ]" if len(ugeoip) == 1 and ugeoip[0] == '*': for item in self.geocfg['field']: if (self.geoipdb[ip].has_key(item)): if (outPtr != None): outstr = "%s%s%s" % ('{0:30} => '.format(item), self.geoipdb[ip][item], "\n") outstr = outstr.encode('ascii', 'ignore') outPtr.write(outstr) else: print '{0:30} => '.format( item), self.geoipdb[ip][item] else: for item in ugeoip: if (self.geoipdb[ip].has_key(item)): if (outPtr != None): outstr = "%s%s%s" % ('{0:30} => '.format(item), self.geoipdb[ip][item], "\n") outstr = outstr.encode('ascii', 'ignore') outPtr.write(outstr) else: print '{0:30} => '.format( item), self.geoipdb[ip][item] ## run RDAP IP info if (outPtr != None): outPtr.write("[ RDAP IP ]\n") else: print "[ RDAP IP ]" if len(urdapip) == 1 and urdapip[0] == '*': for item in self.rdapcfg['ipfield']: if (self.rdapipdb[ip].has_key(item)): if (outPtr != None): outstr = "%s%s%s" % ('{0:30} => '.format(item), self.rdapipdb[ip][item], "\n") outstr = outstr.encode('ascii', 'ignore') outPtr.write(outstr) else: print '{0:30} => '.format( item), self.rdapipdb[ip][item] else: for item in urdapip: if (self.rdapipdb[ip].has_key(item)): if (outPtr != None): outstr = "%s%s%s" % ('{0:30} => '.format(item), self.rdapipdb[ip][item], "\n") outstr = outstr.encode('ascii', 'ignore') outPtr.write(outstr) else: print '{0:30} => '.format( item), self.rdapipdb[ip][item] ## run RDAP Contact (entities) info if (outPtr != None): outPtr.write("[ RDAP Contact ]\n") else: print "[ RDAP Contact ]" for contact in self.rdapcontactdb[ip]: try: if (outPtr != None): outPtr.write("[- " + contact['ROLES'] + " -]\n") else: print "[- " + contact['ROLES'] + " -]" except: if (outPtr != None): outPtr.write("[- NULL -]\n") else: print "[- NULL -]" if len(urdapentity) == 1 and urdapentity[0] == '*': for item in self.rdapcfg['entityfield']: if (contact.has_key(item)): if (outPtr != None): outstr = "%s%s%s" % ('{0:30} => '.format(item), contact[item], "\n") outstr = outstr.encode('ascii', 'ignore') outPtr.write(outstr) else: print '{0:30} => '.format(item), contact[item] else: for item in urdapentity: if (contact.has_key(item)): if (outPtr != None): outstr = "%s%s%s" % ('{0:30} => '.format(item), contact[item], "\n") outstr = outstr.encode('ascii', 'ignore') outPtr.write(outstr) else: print '{0:30} => '.format(item), contact[item] if (outPtr != None): outPtr.close()
def work(self): printer = open("results", "w") percPrinter = open("perc", "w") sprinter = StringIO.StringIO() correctCount = 0 count = 0 p = Parse.Parse("input") try: while True: count += 1 a, b, c = p.getInt(), p.getInt(), p.getInt() if a == None: break self.done = True # self.doprint(printer,sprinter,_TRSTART+str(count)+_TDEND) sprinter.write(_TRSTART + str(count) + _TDEND) self.resultReturned = False self.runtime = False self.typeOk = True self.eobj = None self.result = None def inner(): try: self.result = Grayscale.convert( copy.copy(a), copy.copy(b), copy.copy(c)) self.resultReturned = True except: self.runtime = True self.eobj = sys.exc_info()[0:2] #result = CLASS.METHOD(ARGCALL) try: t = StoppableThread.StoppableThread(target=inner) t.start() t.join(_RUNSECS) if t.isAlive(): t.terminate() t.join() self.runtime = False self.done = False except: self.done = False self.typeOk = type(self.result) == types.FloatType expected = p.matchDouble(self.result) if not self.done or not self.resultReturned or not self.typeOk: self.doprint(printer, sprinter, _TDFAIL) if self.runtime: sprinter.write('<TD>runtime exception:') #sprinter.write(' exception: '+str(self.eobj[0])) traceback.print_exception(self.eobj[0], self.eobj[1], None, None, sprinter) elif not self.resultReturned: sprinter.write('<TD>time limit exceeded:') elif not self.typeOk: sprinter.write("<TD>wrong return type: got <xmp>" + str(type(self.result)) + "</xmp>") sprinter.write("expected <xmp>" + str(types.FloatType) + "</xmp>") else: sprinter.write('<TD>strange error, contact professor:') sprinter.write(str(a) + ' ') sprinter.write(str(b) + ' ') sprinter.write(str(c) + ' ') self.doprint(printer, sprinter, _TDEND) elif not p.ok(): # printer.write(_TDFAIL+'</TR>\n') sprinter.write('<TD class="fail outcome">fail</TD>' + '<TD>expected<PRE> ') sprinter.write(str(expected)) sprinter.write('</PRE>got<BR><PRE>') sprinter.write(str(self.result)) sprinter.write('</PRE>: ') sprinter.write(str(a) + ' ') sprinter.write(str(b) + ' ') sprinter.write(str(c) + ' ') sprinter.write(_TDEND + '\n') else: correctCount += 1 # printer.write(_TDPASS+"\n") sprinter.write('<TD class="pass outcome">pass</TD><TD>') sprinter.write('got<br><PRE> ') sprinter.write(str(self.result)) sprinter.write('</PRE>: ') sprinter.write(str(a) + ' ') sprinter.write(str(b) + ' ') sprinter.write(str(c) + ' ') sprinter.write(_TDEND + '\n') except: pass finally: if count <= 1: perc = 0.0 else: perc = correctCount * 1.0 / (count - 1) line = '<!--PERC:%1.4f --><P>' % (perc) printer.write(line + '\n') line = '%1.5f' % (perc) percPrinter.write(line + '\n') if correctCount == count - 1: printer.write('<!-- ' + _ALLPASS + ' --><P>\n') printer.write('# of correct: ' + str(correctCount)) printer.write(' out of ' + str(count - 1) + '\n') printer.write(sprinter.getvalue() + '\n') sprinter.close() printer.close() percPrinter.close()
def collect(self): global elapsed_time global elapsed_time1 global elapsed_time2 global elapsed_time3 global elapsed_time4 self.results=[] self.string='' imax = 50 ea = 0.00001 equations = [e.get() for e in self.namR] temp=[e.get() for e in self.guess] if self.v.get() == 3: start = time.perf_counter() self.results,self.string=Parse.Parse(equations,"LU",self.NumberOfEquations,[],0,0) end = time.perf_counter() elapsed_time = end - start self.PassDataToPage1() if self.v.get() == 2: start = time.perf_counter() Parse.Parse(equations, "Gaussian-jordan", self.NumberOfEquations,[],0,0) end = time.perf_counter() elapsed_time = end - start self.PassDataToPage1() if self.v.get()== 1: start = time.perf_counter() self.results=Parse.Parse(equations, "Gaussian-elimination", self.NumberOfEquations,[],0,0) end = time.perf_counter() elapsed_time = end - start self.PassDataToPage1() if self.v.get()== 4: if self.expression_field_1.get() != '': ea = float(self.expression_field_1.get()) if self.expression_field_2.get() != '': imax = int(self.expression_field_2.get()) IntialGuesses = [int(numeric_string) for numeric_string in temp] start = time.perf_counter() Parse.Parse(equations, "seidel", self.NumberOfEquations,IntialGuesses, ea, imax) end = time.perf_counter() elapsed_time = end - start self.PassDataToPage1() if self.v.get()== 5: if self.expression_field_1.get() != '': ea = float(self.expression_field_1.get()) if self.expression_field_2.get() != '': imax = int(self.expression_field_2.get()) IntialGuesses = [int(numeric_string) for numeric_string in temp] start = time.perf_counter() Parse.Parse(equations, "LU", self.NumberOfEquations, [], 0, 0) end = time.perf_counter() elapsed_time1 = end - start start = time.perf_counter() Parse.Parse(equations, "Gaussian-jordan", self.NumberOfEquations, [], 0, 0) end = time.perf_counter() elapsed_time2 = end - start start = time.perf_counter() Parse.Parse(equations, "Gaussian-elimination", self.NumberOfEquations, [], 0, 0) end = time.perf_counter() elapsed_time3 = end - start start = time.perf_counter() Parse.Parse(equations, "seidel", self.NumberOfEquations, IntialGuesses, ea, imax) end = time.perf_counter() elapsed_time4 = end - start self.PassDataToPage1()
break def new_task_test(q): r = redis.Redis(host="localhost", port=6379) while True: a = q.get() print("下一个连接连接", a) r.sadd("youxinnew_master_tasks", a) if __name__ == '__main__': spider_name = "youxin" rq = Req.Req(spider_name) r1 = Url_Manager.UrlMana(spider_name) p_mas = Parse.Parse() html_queue = mp.Queue() item_queue = mp.Queue() new_task_queue = mp.Queue() r1.init_task(create_start_urls()) tasks = r1.task_create_filter(200) print(tasks) p1 = mp.Process(target=rq.cun, args=(tasks, html_queue)) p2 = mp.Process(target=get_html, args=(html_queue, item_queue, p_mas.master_parse, new_task_queue)) p3 = mp.Process(target=get_item, args=(item_queue, new_task_queue)) p4 = mp.Process(target=new_task_test, args=(new_task_queue, )) p1.start() p2.start() p3.start()
def processPaper(paperId): content = cr.crawlPaperMain(paperId) soup = BeautifulSoup(content) paper = pr.getPaperInfo(soup) paper.id = paperId outPut_sql(paper) def processUser(userId): content = cr.crawlAuthorPub(userId) pr.parseAuthorPub(content) cr = Crawler.Crawler() pr = Parse.Parse() sql = SQLConn.MysqlUti() Init(pr, sql) pr.testPaper('2488441') paperId = sql.getPaper() while ((paperId != None) or (sql.getUser() != None)): while paperId != None: while (checkTime() == False): print('sleeping... ') time.sleep(1.5 * 3600) print(paperId) try: processPaper(paperId)
def action_history_to_parse(self, act_idxs): #print("act_idxs", act_idxs) parse = Parse() p, act_history_length = 0, len(act_idxs) while p < act_history_length: act_idx = act_idxs[p] act = self.actions[act_idx] if act.type == ActionType.Select: # having SELECT meaning it's an independent parse parse.type = Parse.Independent parse.select_columns.append(act.col) # record the column it selects elif act.type == ActionType.WhereCol: col = act.col # have to assume that the follow-up action is about the operator and argument p += 1 #print("p =", p) act_idx = act_idxs[p] act = self.actions[act_idx] if act.type != ActionType.Stop: # This is the only legitimate type after WhereCol currently. Will have to expand the coverage later for more action types. assert (act.type in ActionType.WhereConditions), "Illegit action type after WhereCol: %d" % act.type if act.type == ActionType.CondEqRow: cond = Condition(col, Condition.OpEqRow, act.row) elif act.type == ActionType.CondNeRow: cond = Condition(col, Condition.OpNeRow, act.row) elif act.type == ActionType.CondGT: cond = Condition(col, Condition.OpGT, act.val[1]) elif act.type == ActionType.CondGE: cond = Condition(col, Condition.OpGE, act.val[1]) elif act.type == ActionType.CondLT: cond = Condition(col, Condition.OpLT, act.val[1]) elif act.type == ActionType.CondLE: cond = Condition(col, Condition.OpLE, act.val[1]) elif act.type == ActionType.ArgMin: cond = Condition(col, Condition.OpArgMin) elif act.type == ActionType.ArgMax: cond = Condition(col, Condition.OpArgMax) parse.conditions.append(cond) elif act.type == ActionType.SameAsPrevious: parse.type = Parse.FollowUp elif act.type == ActionType.FpWhereCol: parse.type = Parse.FollowUp col = act.col # have to assume that the follow-up action is about the operator and argument p += 1 act_idx = act_idxs[p] act = self.actions[act_idx] if act.type != ActionType.Stop: # This is the only legitimate type after WhereCol currently. Will have to expand the coverage later for more action types. assert (act.type in ActionType.FpWhereConditions), "Illegit action type after FpWhereCol: %d" % act.type if act.type == ActionType.FpCondEqRow: cond = Condition(col, Condition.OpEqRow, act.row) elif act.type == ActionType.FpCondNeRow: cond = Condition(col, Condition.OpNeRow, act.row) elif act.type == ActionType.FpCondGT: cond = Condition(col, Condition.OpGT, act.val[1]) elif act.type == ActionType.FpCondGE: cond = Condition(col, Condition.OpGE, act.val[1]) elif act.type == ActionType.FpCondLT: cond = Condition(col, Condition.OpLT, act.val[1]) elif act.type == ActionType.FpCondLE: cond = Condition(col, Condition.OpLE, act.val[1]) elif act.type == ActionType.FpArgMin: cond = Condition(col, Condition.OpArgMin) elif act.type == ActionType.FpArgMax: cond = Condition(col, Condition.OpArgMax) parse.conditions.append(cond) else: assert (act.type == ActionType.Start or act.type == ActionType.Stop), "Unknown action type: %d" % act.type p += 1 return parse