def init(args): if args.dev_mode and not Path("db_dev.json").exists(): print("Create db_dev.json first") return elif not args.dev_mode and not Path("db.json").exists(): print("Create db.json first") return print("Downloading nltk packages") nltk.download("stopwords") nltk.download("punkt") nltk.download("wordnet") nltk.download("averaged_perceptron_tagger") print("Creating the database tables") datastore = DataStore(args) datastore.create_tables() datastore.close() print("Creating the classifier") Model(args) print("Scraping the data") scrape_data(args)
def close(self): self.commitStockBatch() self.commitBatch() self.conn.commit() self.conn.close() self.conn = None DataStore.close(self)
def update_database(self, peers): DataStore.mutex.acquire() database_conn = DataStore() current_nodes = database_conn.get_all_peers() new_list = [] for peer in peers: overlay = False for node in current_nodes: if node.ip == peer["ip"] and node.port == peer["port"]: if node.rating < 30: node.rating = 30 database_conn.update(node) overlay = True if overlay == False: new_list.append(PeerNode(0, peer["id"], peer["ip"], peer["port"], 30)) database_conn.insert_peers(new_list) database_conn.close() DataStore.mutex.release()
class TaskHandler: PEER_REPICK = 5 def __init__(self, remote_ip, remote_port): self.udp_sender = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.socket_table = {} #self.poller = zmq.Poller() #self.initialize_connections() self.remote_ip = remote_ip self.remote_port = remote_port self.task_sema = threading.Semaphore(0) self.task_mutex = threading.Lock() self.predictor = MLEngine(D2LogisticRegression()) self.broadcast_thread = threading.Thread(target=self.start_process, args = ()) self.broadcast_thread.start() def initialize_connections(self): if len(self.socket_table) < TaskHandler.PEER_REPICK: if DBGMODE : print("initialize_connections [start]") DataStore.mutex.acquire() self.datastore = DataStore() self.peers = self.datastore.get_highest_rating(10) for peer in self.peers: if peer.id not in self.socket_table and (peer.ip != self.remote_ip or peer.port !=self.remote_port): self.socket_table[peer.id] = (peer.ip, int(peer.port)) self.datastore.close() DataStore.mutex.release() # def initialize_connections(self): # if len(self.socket_table) < TaskHandler.PEER_REPICK: # if DBGMODE : print("initialize_connections [start]") # DataStore.mutex.acquire() # self.datastore = DataStore() # self.peers = self.datastore.get_highest_rating(2) # context = zmq.Context() # for peer in self.peers: # if peer.id not in self.socket_table: # #print("connect to: tcp://{}".format(peer)) # socket_item = context.socket(zmq.REQ) # socket_item.connect ("tcp://{}:{}".format(peer.ip, peer.port)) # self.poller.register(socket_item, zmq.POLLIN) # self.socket_table[peer.id] = socket_item # self.datastore.close() # DataStore.mutex.release() # print("initialize_connections [end]") def join(self): self.broadcast_thread.join() def launch_task(self, task): if DBGMODE : print("launch_task [start]") self.current_task = task self.task_sema.release() if task.camp == 0: local_result = self.predictor.recommend(task.radiant, task.dire) else: local_result = self.predictor.recommend(task.dire, task.radiant) self.current_task.consume(local_result) self.check_and_done() if DBGMODE : print("launch_task [end]") def check_and_done(self): if DBGMODE : print("check_and_done [start]") self.task_mutex.acquire() if self.current_task != None and self.current_task.finished(): #recommendations = self.recommender.recommend(command["radiant"], command["dire"]); result = self.process_responses(self.current_task.result) json_str = json.dumps(result); self.current_task.socket.sendto(json_str.encode("utf-8"), self.current_task.addr) print("response message:"+json_str) self.current_task = None self.task_mutex.release() if DBGMODE : print("check_and_done [end]") def process_responses(self,responses): if DBGMODE : print("process_responses [start]") result = {} result["candidates"] = [] print("process_response: \t"+json.dumps(responses)) for i in range(len(responses)): result["candidates"].append({"heroId": responses[i][1], "rate":responses[i][0]}) if DBGMODE : print("process_responses [end]") return result def start_process(self): while True: self.initialize_connections() if DBGMODE : print("pre start process task.") self.task_sema.acquire() if DBGMODE : print("real process task.") for pid in self.socket_table: target = self.socket_table[pid] message = json.dumps(self.current_task.request) print(target) self.udp_sender.sendto(message.encode("utf-8"), target) result = [] active_entry = [] remain_time = RETRY_TIMES*TIME_BOUND start_time = time.time() count = 0; print("remain tiem is:"+str(remain_time)) #for count in range(RETRY_TIMES): while True: try: print("[start] recvfrom remote") self.udp_sender.settimeout(remain_time) data,addr = self.udp_sender.recvfrom(4096) message = data.decode("utf-8") print("[middle] recvfrom:"+message) element = json.loads(message) active_entry.append({"ip":element["ip"], "port":element["port"]}) result.extend(element["candidates"]) count += 1; remain_time = remain_time + start_time - time.time() #print("[end] recvfrom remote: counter") #print("len = "+str(len(self.socket_table))) #print("count = "+str(count)) #print("remain_time = "+str(remain_time)) if len(self.socket_table) <= count or remain_time <= 0: break except: break dead_list = [] for peer in self.peers: active = False for entry in active_entry: if peer.ip == entry["ip"] and peer.port == entry["port"]: active = True if not active: dead_list.append(peer.id) for pid in dead_list: #self.poller.unregister(self.socket_table[pid]) print("close pid: "+str(pid)) #self.socket_table[pid].close() self.socket_table.pop(pid, None) self.peers = [peer for peer in self.peers if peer.id not in dead_list] # delete dead peers[end] print("peers size ", len(self.peers)) print("remote resulting message {}".format(json.dumps(result))) self.current_task.consume(result) self.check_and_done()