def fetchTweetsByUser(userName): global f getAPIConnection() max_tweets = 100 count = 0 userID = userName global result userTimeline = api.user_timeline(userID) for tweet in userTimeline: temp = tweet.text #Removal of unwanted characters. linkRemoval = re.sub(r"http\S+", "", temp) hashRemoval = re.sub(r"#", "", linkRemoval) for character in (hashRemoval): if (ord(character) >= 127): character = "" utfRemoval = hashRemoval.encode('ascii', 'ignore').decode('ascii') result = re.sub(r"@\S+", "", utfRemoval) print("--------") getList() count += 1 getCSV() f.close() an.analyzer() os.remove("./data/dtweets.csv") res = getAnalysis() os.remove("./data/danalysis.csv") return res
def fetchTweetsByHash(hash): maxTweets = 100 getAPIConnection() #Requesting API Connection. global api searchQuery = hash langPref = "en" count = 0 global result tweetResFinal = tweepy.Cursor(api.search, searchQuery, langPref, tweet_mode="extended") for tweet in tweetResFinal.items(maxTweets): temp = tweet.full_text #Removal of unwanted characters. linkRemoval = re.sub(r"http\S+", "", temp) hashRemoval = re.sub(r"#\S+", "", linkRemoval) for character in (hashRemoval): if (ord(character) >= 127): character = "" utfRemoval = hashRemoval.encode('ascii', 'ignore').decode('ascii') result = re.sub(r"@\S+", "", utfRemoval) print('-----') getList() count += 1 getCSV() global f f.close() an.analyzer() os.remove("./data/dtweets.csv") res = getAnalysis() os.remove("./data/danalysis.csv") return res
def analysis(self, from_file=False, from_text=False): temp = None if from_file: if self.file_name == "": mb.showerror("Error", "Вы не указали текстовый файл.") else: temp = analyzer.analyzer(file=self.file_name) elif from_text: temp = analyzer.analyzer(text=self.text) elif (from_text is None) and (from_file is None): mb.showerror("Error", "Вы не указали текстовый файл/не написали текст.") if temp is not None: temp.start() return temp.gui_otput()
def test(): anal = analyzer() fname = "pages/141130/141130_10:42:18.html" data = codecs.open(fname, 'r', 'utf8').read() pl = anal.list_papers(data) for i in range(len(pl)): if i != 12: continue print unicode(anal.extract_c(pl[i]))
def main(): # Read in all of the ids and item names. item_ids_and_names = read_item_ids() # Fetch all of the pricing for all the ids from the OSRS GE API price_fetch = price_fetcher() item_ids_with_pricing = price_fetch.fetch_id_prices(item_ids_and_names) # Compile the fetched pricing data into a usable CSV. analyzer_object = analyzer() analyzer_object.analyze(item_ids_with_pricing, item_ids_and_names)
def run(): """ This is the manager thread that will manage the listener (who is the thread that listens for Twitter data and sends this data to our database. It will also manage the db_handler, which is the thread that does clean up on our database after every cycle so we limit the amount of memory stored in our database. Check our their docs for more details. :return: None """ # Pre initialization my_db_handler = dbh.Handler('cryptweets_test', 'lfvarela') i = 0 while True: table_name = 'tweets_day_' + str(i) print('Main: creating table ' + str(table_name)) with my_db_handler: my_db_handler.create_table(table_name, tweet_table_template) listener_t = multiprocessing.Process( target=listener.twitter_listener_t, args=( my_db_handler, table_name, )) listener_t.start() _set_timer(hour=23, minute=29) print("Main: terminating listener") listener_t.terminate() listener_t.join() print("Main: analyzing data") analyzer.analyzer(my_db_handler, table_name) print() i += 1
def main(): movie2 = [] genres = [] with open('movie.json') as infile: movie = json.load(infile) with open('moviesent.json', 'w') as f: for i in range(0, 1000): movie2.append({ "title": movie[i]['title'], "ratings": movie[i]['vote_average'], "genres": movie[i]['genres'], "sentiment": analyzer(movie[i]['overview']) }) json.dump(movie2, f, indent=2) with open('genres.json', 'w') as f1: for i in range(0, len(movie)): txt = movie[i]['genres'] x = txt.split("|") for y in range(0, len(x)): if x[y] not in genres: genres.append(x[y]) json.dump(genres, f1, indent=2)
def __init__(self, addr, port): self.port = port self.address = addr self.server = socket(AF_INET, SOCK_STREAM) self.analyze = analyzer()
#import analyzer #reload(analyzer) from analyzer import analyzer a = analyzer() #a.collect_all_data() #a.process_all_data() #a.plot_all_data()
HOST, PORT = '', 8888 listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) listen_socket.bind((HOST, PORT)) listen_socket.listen(1) print 'Serving HTTP on port %s ...' % PORT #print sys.path[0] while True: client_connection, client_address = listen_socket.accept() request = client_connection.recv(1024) pt = re.compile(r'GET /(.*?) HTTP',re.S) match = pt.findall(request) http_response = '{"code":"-1", "msg":"Access Denied", "data":""}'; if len(match) != 0 and match[0] == 'get': try: analyzerTool = analyzer('%s/data/%s.weather' % (sys.path[0],city_code)) msg = analyzerTool.run(); print msg if msg['code'] == -1: http_response = '{"code":"-2", "msg":"no thing to remind", "data":""}'; else: http_response = '{"code":"0", "msg":"%s", "data":""}' % msg['msg']; except Exception,e: print e; http_response = '{"code":"-3", "msg":"Unknow Error", "data":""}'; client_connection.sendall(http_response) client_connection.close()
def main(): inf = None fd_log = None ana = None dbc = None remove_time = 1.2 uncolor_time = 1.2 # initialize parser, args = parse_args() if args.version: print(__version__) return elif args.canusb_dev: inf = interface.canusb(args.canusb_dev) if inf is None: print("interface intialize error %s" % (args.canusb_dev)) return if args.logging_name: fd_log = open(args.logging_name, "w") elif args.pythoncan_dev: inf = interface.pythoncan(args.pythoncan_dev) if inf is None: print("interface intialize error %s" % (args.pythoncan_dev)) return if args.logging_name: fd_log = open(args.logging_name, "w") elif args.usb2can_dev: inf = interface.usb2can() if inf is None: print("interface intialize error usb2can_dev") return if args.logging_name: fd_log = open(args.logging_name, "w") elif args.candump_log: inf = interface.candump(args.candump_log) if inf is None: print("interface intialize error %s" % (args.candump_log)) return elif args.candump_log_nots: remove_time = 100 inf = interface.candump_nots(args.candump_log_nots) if inf is None: print("interface intialize error %s" % (args.candump_log_nots)) return elif args.vehiclespy_csv: remove_time = 0.02 uncolor_time = 0.002 inf = interface.vehiclespy(args.vehiclespy_csv) if inf is None: print("interface intialize error %s" % (args.vehiclespy_csv)) return else: parser.print_help() return # diff mode if args.diff: main_diff(inf) return # initialize filter/find if args.filter_by_ids: filter_by_ids = [int(i, 16) for i in args.filter_by_ids.split(",")] else: filter_by_ids = None find_string_ids = [] # initialize library scn = screen.screen() scn.color("", "w") if args.analyze: ana_level = 1 ana = analyzer.analyzer(ana_level) if args.dbc: dbc = cantools.database.load_file(args.dbc) # main loop msgs_latest = {} view_line_num_latest = 0 scn.clear() for msg in inf.recv(): # recv new msg ts, dev_name, msg_id, msg_size, msg_dat = msg.timestamp, msg.channel, msg.arbitration_id, msg.dlc, msg.data.hex( ) # filter by id if filter_by_ids and msg_id not in filter_by_ids: continue # logging if fd_log: fd_log.write("(%f) %s %03X#%s\x0a" % (ts, dev_name, msg_id, msg_dat)) continue # analyze if ana: ana.analyze(msg) # find string if ana and args.find_string: if msg_id in find_string_ids: pass elif args.find_string in ana.get_msg_ascii(msg_id): find_string_ids.append(msg_id) else: continue # view msg if args.not_view_msg: # not view msg pass elif args.sniffer: # sniffer view mode (only latest msg each msg_id) scn.move(0, 0) view_line_num_latest = view_msg(msg, msgs_latest, scn, ana, view_line_num_latest, remove_time, uncolor_time) else: # flow view mode view_line_num_latest = view_msg( msg, {msg_id: msgs_latest.get(msg_id, msg)}, scn, ana, view_line_num_latest, remove_time, uncolor_time) if dbc: try: for k, v in dbc.decode_message(msg.arbitration_id, msg.data).items(): print(k, v) except: pass # set latest msg msgs_latest[msg_id] = msg # view byte/bit range view_range(msgs_latest, ana, args.analyze_range) if fd_log: fd_log.close() inf.close() return
def main_diff(inf): # functions def view_range_one(diff_direction, msg_id, msg_dat, range_min, range_max, range_bit): print("%s0x%03X %s" % (diff_direction, msg_id, msg_dat)) for i in range(len(range_min)): if range_min[i] is not None: if range_min[i] != range_max[i]: print(" " + " " + " " * i + "^^%02X-%02X %s %s" % (range_min[i], range_max[i], format((range_bit[i] >> 4) & 0x0f, "04b"), format(range_bit[i] & 0x0f, "04b"))) else: break def view_range_two(diff_direction, msg_id, msg_dat, range_min, range_max, range_bit, range_min2, range_max2, range_bit2): did_view_title = False for i in range(max(len(range_min), len(range_min2))): # view diff if (len(range_min) > i and range_min[i] is not None) and ( len(range_min2) > i and range_min2[i] is not None): if range_bit[i] != range_bit2[i]: if did_view_title == False: print("%s0x%03X %s" % (diff_direction, msg_id, msg_dat)) did_view_title = True print(" " + " " + " " * i + "^^%02X-%02X %s %s" % (range_min[i], range_max[i], format((range_bit[i] >> 4) & 0x0f, "04b"), format(range_bit[i] & 0x0f, "04b"))) print(" " + " " + " " * i + "^^%02X-%02X %s %s" % (range_min2[i], range_max2[i], format((range_bit2[i] >> 4) & 0x0f, "04b"), format(range_bit2[i] & 0x0f, "04b"))) elif len(range_min) > i and range_min[i] is not None: if range_min[i] != range_max[i]: if did_view_title == False: print("%s0x%03X %s" % (diff_direction, msg_id, msg_dat)) did_view_title = True print(">" + " " + " " * i + "^^%02X-%02X %s %s" % (range_min[i], range_max[i], format((range_bit[i] >> 4) & 0x0f, "04b"), format(range_bit[i] & 0x0f, "04b"))) elif len(range_min2) > i and range_min2[i] is not None: if range_min2[i] != range_max2[i]: if did_view_title == False: print("%s0x%03X %s" % (diff_direction, msg_id, msg_dat)) did_view_title = True print("<" + " " + " " * i + "^^%02X-%02X %s %s" % (range_min2[i], range_max2[i], format((range_bit2[i] >> 4) & 0x0f, "04b"), format(range_bit2[i] & 0x0f, "04b"))) # values inf2 = None ana = None ana2 = None # initialize parser, args = parse_args() if args.candump_log2: inf = interface.candump(args.candump_log2) if inf is None: print("interface intialize error %s" % (args.candump_log2)) return elif args.candump_log_nots2: inf2 = interface.candump_nots(args.candump_log_nots2) if inf2 is None: print("interface intialize error %s" % (args.candump_log_nots2)) return else: return # initialize library ana_level = 1 ana = analyzer.analyzer(ana_level) ana2 = analyzer.analyzer(ana_level) # main_diff loop # read inf msgs_latest = {} view_line_num_latest = 0 for msg in inf.recv(): # recv new msg ts, dev_name, msg_id, msg_size, msg_dat = msg.timestamp, msg.channel, msg.arbitration_id, msg.dlc, msg.data.hex( ) # analyze ana.analyze(msg) # set latest msg msgs_latest[msg_id] = msg # read inf2 msgs_latest2 = {} view_line_num_latest2 = 0 for msg2 in inf2.recv(): # recv new msg ts, dev_name, msg_id, msg_size, msg_dat = msg2.timestamp, msg2.channel, msg2.arbitration_id, msg2.dlc, msg2.data.hex( ) # analyze ana2.analyze(msg2) # set latest msg msgs_latest2[msg_id] = msg2 # view byte/bit range msgs_latest, msgs_latest2 = sorted(msgs_latest.items()), sorted( msgs_latest2.items()) v_msg, v_msg2 = None, None # init if len(msgs_latest) == 0 and len(msgs_latest) == 0: return _, v_msg = msgs_latest.pop(0) _, v_msg2 = msgs_latest2.pop(0) while v_msg is not None and v_msg2 is not None: #print(v_msg, v_msg2) msg_id = v_msg.arbitration_id if v_msg is not None else None msg_id2 = v_msg2.arbitration_id if v_msg2 is not None else None if (msg_id2 is None and msg_id is not None) or msg_id < msg_id2: # view v_msg range_min, range_max, range_bit = ana.get_msg_range(msg_id) view_range_one(">", msg_id, v_msg.data.hex(), range_min, range_max, range_bit) _, v_msg = msgs_latest.pop(0) if len(msgs_latest) > 0 else (None, None) elif (msg_id is None and msg_id2 is not None) or msg_id > msg_id2: # view v_msg2 range_min2, range_max2, range_bit2 = ana2.get_msg_range(msg_id2) view_range_one("<", msg_id2, v_msg2.data.hex(), range_min2, range_max2, range_bit2) _, v_msg2 = msgs_latest2.pop(0) if len(msgs_latest2) > 0 else ( None, None) else: # view diff v_msg, v_msg2 range_min, range_max, range_bit = ana.get_msg_range(msg_id) range_min2, range_max2, range_bit2 = ana2.get_msg_range(msg_id2) view_range_two(" ", msg_id, v_msg.data.hex(), range_min, range_max, range_bit, range_min2, range_max2, range_bit2) _, v_msg = msgs_latest.pop(0) if len(msgs_latest) > 0 else (None, None) _, v_msg2 = msgs_latest2.pop(0) if len(msgs_latest2) > 0 else ( None, None) inf.close() inf2.close() return
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) listen_socket.bind((HOST, PORT)) listen_socket.listen(1) print 'Serving HTTP on port %s ...' % PORT #print sys.path[0] while True: client_connection, client_address = listen_socket.accept() request = client_connection.recv(1024) pt = re.compile(r'GET /(.*?) HTTP', re.S) match = pt.findall(request) http_response = '{"code":"-1", "msg":"Access Denied", "data":""}' if len(match) != 0 and match[0] == 'get': try: analyzerTool = analyzer('%s/data/%s.weather' % (sys.path[0], city_code)) msg = analyzerTool.run() print msg if msg['code'] == -1: http_response = '{"code":"-2", "msg":"no thing to remind", "data":""}' else: http_response = '{"code":"0", "msg":"%s", "data":""}' % msg[ 'msg'] except Exception, e: print e http_response = '{"code":"-3", "msg":"Unknow Error", "data":""}' client_connection.sendall(http_response) client_connection.close()
def component_analysis(self, cname): target_grade = self.db.get_target_grade(cname) grade_details = self.db.retrieveCourseGradeData(cname) analyze = analyzer(cname, target_grade, grade_details, self.db) self.ui.show_analysis(grade_details, analyze)
def project(line): """ With given line containing nobel, title, and year, crawl given paper of bot, cited papers, and reference papers of cited papers. If the data is unavailable because of the license, the program will return False. """ # Project Initialize bot = wos_bot() anal = analyzer() nobel, title, year = line.strip().split("\t") logging.info("#" * 50) logging.info('START : %s', nobel) bot.search(title, year) logging.info('SEARCH : %s\t%s', title, year) papers = anal.list_papers(bot.data) if len(papers) != 1: logging.error('INVALID SEARCH : %s (%s) NUM :# %s', title, nobel, str(len(papers))) bot.save("INVALID_SEARCH") return False bot.go_url(bot.get_url("paper", papers[0])) bot.save("ORIGIN_PAPER") droot = anal.extract(bot.data) curl = bot.get_url("cite") rurl = bot.get_url("ref") logging.debug("LEVEL 1 : %s", droot.title) if not curl: logging.error("NOT CITED : NO URL : %s (%s)", title, nobel) return False bot.go_url(curl) bot.save() papers = anal.list_papers(bot.data) if len(papers) == 0: logging.error("NOT CITED : FALSE CITED : %s (%s)", title, nobel) return False bot.go_url(bot.get_url("paper", papers[0])) curl = bot.br.geturl() # Check Previous Data logging.info("CHECK PREVIOUS DATA") pset, eset, nset, nmax = anal.Check_data(nobel) """ pset : set of papers eset : set of edges nmax : maximum 2-C """ bot.go_n(nmax) paper = anal.extract(bot.data) if paper in nset: # Continue previous result nc = nmax logging.info('RESTART : %s with last record # %s', nobel, str(nc)) nurl = bot.get_url("next") if not nurl: logging.info('FINISHED %s (%s)', droot.title, nobel) logging.info("#"*50) return True else: bot.go_url(nurl) # Check Error on next paper page # If there is an error we should break else: # First Start logging.info('INITIAL START : %s', nobel) nc = 0 pset, eset = set(), set() Add_paper(droot, pset) Cite_or_ref(bot, anal, droot, 0, "R", rurl, pset, eset) write_data(nobel, 0, pset, eset, "w") bot.go_url(curl) del nset while True: nc += 1 logging.info("LEVEL 2-C : %s / %s", str(nc), str(droot.ccnt)) pset, eset = set(), set() paper1 = anal.extract(bot.data) if not paper1: logging.info("INVALID NEXT") break Add_pe(droot, paper1, "C", pset, eset) # Get Links curl = bot.get_url("cite") rurl = bot.get_url("ref") nurl = bot.get_url("next") # Go to the list Cite_or_ref(bot, anal, paper1, 1, "R", rurl, pset, eset) Cite_or_ref(bot, anal, paper1, 3, "C", curl, pset, eset) write_data(nobel, nc, pset, eset) # Go to the next paper page if not nurl: logging.info("END OF LIST") break else: bot.go_url(nurl) # Check Error on next paper page # If there is an error we should break logging.info('FINISHED %s (%s)', droot.title, nobel) logging.info("#"*50) return True
import pdftotxt from tokenizer import tokenizer from analyzer import analyzer from conclusion import conclusions import os pdftotxt.converter() listd=os.listdir(r'666/') for f in listd: count=1 path='666/'+f token=tokenizer() analyze=analyzer() con=conclusions() token.tokenize(path) listt=token.getwords() analyze.analyze(listt) s=analyze.getResult() con.conclude(s,count) con.writefile("conclusion.txt") count+=1