def timeout(self, time, packet): """ A timeout occurred at the given time on the given packet. Resend if necessary. :param time: Time to resend the packet :type time: int :param packet: Packet which timed out :type packet: Packet """ # We already received an Ack for it if packet.id not in self.awaiting_ack: return else: # Otherwise, remove it so that it will be added again del self.awaiting_ack[packet.id] if not isinstance(packet, FlowPacket): # If an ACK packet is dropped, don't worry about it, it'll be sent # again later return flow_id = packet.flow_id if self.current_request_num is not None and \ packet.sequence_number < self.current_request_num: # Packet was already received return self.congestion_control.handle_timeout(packet, time) # Resend Logger.info(time, "Packet %s was dropped, resending" % (packet.id)) self.queue.add(packet) self.send_packets(time, flow_id)
def handle_receive(self, packet, time): if isinstance(packet, AckPacket): Rn = packet.request_number self.last_n_req_nums.append(Rn) if len(self.last_n_req_nums) > TCPReno.MAX_DUPLICATES: self.last_n_req_nums.pop(0) Sn, Sb, Sm = self.host.sequence_nums cwnd = self.host.cwnd if self.last_drop is None or \ time - self.last_drop > TCPReno.TIMEOUT_TOLERANCE: if len(self.last_n_req_nums) == TCPReno.MAX_DUPLICATES and \ all(num == Rn for num in self.last_n_req_nums): # If we've had duplicate ACKs, then enter fast retransmit. self.ssthresh = max(self.host.cwnd / 2, TCPReno.INITIAL_CWND) self.set_window_size(time, self.ssthresh) Logger.warning( time, "Duplicate ACKs received for flow %s." % self.host.flow[0]) self.last_drop = time if self.ss: self.set_window_size(time, cwnd + 1) if self.host.cwnd >= self.ssthresh: self.ss = False Logger.info( time, "SS phase over for Flow %s. CA started." % (self.host.flow[0])) elif Rn > Sb: # If we are in Congestion Avoidance mode, we wait for an RTT to # increase the window size, rather than doing it on ACK. self.set_window_size(time, cwnd + 1. / cwnd)
def set_window_size(self, time, value): flow_id = self.flow[0] Logger.info( time, "Window size changed from %0.2f -> %0.2f for flow %s" % (self.cwnd, value, flow_id)) self.cwnd = value self.dispatch(WindowSizeEvent(time, flow_id, self.cwnd))
def receive(self, packet, time): """ Handles receipt of a packet. Args: packet (Packet): The packet. time (float): Time the packet was received """ Logger.info(time, "%s received packet %s" % (self, packet)) # Get the appropriate routing table routing_table = self.get_routing_table() # Update the current routing table with the routing packet if isinstance(packet, StaticRoutingPacket): self.handle_routing_packet(packet, dynamic=False) elif isinstance(packet, DynamicRoutingPacket): self.handle_routing_packet(packet, dynamic=True) # Route the packet elif isinstance(packet, AckPacket) or isinstance(packet, Packet): if not routing_table: Logger.warning( time, "%s dropped packet %s, no routing table. " "Creating one now." % (self, packet)) self.create_routing_table(self.dynamicEnabled) return elif packet.dest.id not in routing_table: # TODO: should we keep a packet queue for packets w/o dest.? Logger.warning( time, "%s dropped packet %s, dest. not in " "routing table." % (self, packet)) return dest_link = routing_table[packet.dest.id].link self.send(packet, dest_link, time) else: raise UnhandledPacketType
def handle_receive(self, packet, time): if isinstance(packet, AckPacket): Rn = packet.request_number self.last_n_req_nums.append(Rn) if len(self.last_n_req_nums) > TCPReno.MAX_DUPLICATES: self.last_n_req_nums.pop(0) Sn, Sb, Sm = self.host.sequence_nums cwnd = self.host.cwnd if self.last_drop is None or \ time - self.last_drop > TCPReno.TIMEOUT_TOLERANCE: if len(self.last_n_req_nums) == TCPReno.MAX_DUPLICATES and \ all(num == Rn for num in self.last_n_req_nums): # If we've had duplicate ACKs, then enter fast retransmit. self.ssthresh = max(self.host.cwnd / 2, TCPReno.INITIAL_CWND) self.set_window_size(time, self.ssthresh) Logger.warning(time, "Duplicate ACKs received for flow %s." % self.host.flow[0]) self.last_drop = time if self.ss: self.set_window_size(time, cwnd + 1) if self.host.cwnd >= self.ssthresh: self.ss = False Logger.info(time, "SS phase over for Flow %s. CA started." % (self.host.flow[0])) elif Rn > Sb: # If we are in Congestion Avoidance mode, we wait for an RTT to # increase the window size, rather than doing it on ACK. self.set_window_size(time, cwnd + 1. / cwnd)
def receive(self, packet, time): """ Handles receipt of a packet. Args: packet (Packet): The packet. time (float): Time the packet was received """ Logger.info(time, "%s received packet %s" % (self, packet)) # Get the appropriate routing table routing_table = self.get_routing_table() # Update the current routing table with the routing packet if isinstance(packet, StaticRoutingPacket): self.handle_routing_packet(packet, dynamic=False) elif isinstance(packet, DynamicRoutingPacket): self.handle_routing_packet(packet, dynamic=True) # Route the packet elif isinstance(packet, AckPacket) or isinstance(packet, Packet): if not routing_table: Logger.warning(time, "%s dropped packet %s, no routing table. " "Creating one now." % (self, packet)) self.create_routing_table(self.dynamicEnabled) return elif packet.dest.id not in routing_table: # TODO: should we keep a packet queue for packets w/o dest.? Logger.warning(time, "%s dropped packet %s, dest. not in " "routing table." % (self, packet)) return dest_link = routing_table[packet.dest.id].link self.send(packet, dest_link, time) else: raise UnhandledPacketType
def init_driver(logger: Logger) -> webdriver: logger.info("Initializing Chromedriver...") # Create a display if 'MAC' not in os.environ: display = Display(visible=0, size=(1200, 1200)) display.start() # Create a new Chrome session chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-dev-shm-usage') prefs = {"profile.managed_default_content_settings.images": 2} chrome_options.add_experimental_option("prefs", prefs) driver: webdriver = None if 'DRIVER' not in os.environ: chrome_options.binary_location = '/usr/bin/chromium-browser' driver = webdriver.Chrome( chrome_options=chrome_options, executable_path='/usr/lib/chromium-browser/chromedriver') else: driver = webdriver.Chrome(chrome_options=chrome_options) driver.implicitly_wait(30) driver.maximize_window() logger.info("Chromedriver started!") return driver
def _create_logger(self, tfrec_config, model_config): # create logger data_name = tfrec_config["data_name"] model_name = tfrec_config["model"] log_dir = os.path.join("./log", data_name, model_name) if not os.path.exists(log_dir): os.makedirs(log_dir) logger_name = '_'.join(["{}={}".format(arg, value) for arg, value in model_config.items() if len(value) < 20]) special_char = {'/', '\\', '\"', ':', '*', '?', '<', '>', '|', '\t'} logger_name = [c if c not in special_char else '_' for c in logger_name] logger_name = ''.join(logger_name) timestamp = time.time() # model name, param, timestamp logger_name = "%s_log_%s_%d.log" % (model_name, logger_name, timestamp) logger_name = os.path.join(log_dir, logger_name) logger = Logger(logger_name) # write configuration into log file info = '\n'.join(["{}={}".format(arg, value) for arg, value in tfrec_config.items()]) logger.info("\nTFRec information:\n%s " % info) logger.info("\n") logger.info("Recommender:%s" % model_name) logger.info("Dataset name:\t%s" % data_name) argument = '\n'.join(["{}={}".format(arg, value) for arg, value in model_config.items()]) logger.info("\nHyperparameters:\n%s " % argument) return logger
def gen_ngrams(self): n = self.max_sep min_count = self.min_count ngrams = defaultdict(int) pre_suffix = defaultdict(dict) after_suffix = defaultdict(dict) t = self.data Logger.info("start generating ngrams") for i in tqdm(range(len(t))): for j in range(1, n + 1): if i + j <= len(t): ngrams[self.encode_str(t[i:i + j])] += 1 if i - 1 >= 0: pre_suffix[self.encode_str( t[i:i + j])][t[i - 1]] = pre_suffix[self.encode_str( t[i:i + j])].get(t[i - 1], 0) + 1 if j + i + 1 <= len(t): after_suffix[self.encode_str( t[i:i + j])][t[j + i]] = after_suffix[self.encode_str( t[i:i + j])].get(t[j + i], 0) + 1 self.ngrams = dict() self.total = 0 for i, j in ngrams.items(): if len(i) == 1: self.total += j if j >= min_count: self.ngrams[i] = j self.pre_suffix = pre_suffix self.after_suffix = after_suffix
class StartUpLems(): """ Start up of local energy management system """ def __init__(self, socket): self.t0 = time.time() self.Conenction_time_max = 1 self.logger = Logger("Local_ems_start_up") self.Operation_mode = default_operation_mode[ "UEMS"] # 1=Work as a universal EMS; 2=Work as a local EMS. self.socket = socket def run(self): """ Communication check of the neighboring MGs :return: Operation mode """ self.socket.send(b"ConnectionRequest") connection_thread = ConnectionThread(self.socket) connection_thread.daemon = True connection_thread.start() connection_thread.join(self.Conenction_time_max) try: if connection_thread.message == b"Start!": self.logger.info( "The connection between the local EMS and universal EMS establishes!" ) except: self.logger.error("Connection is timeout!") self.logger.warning("EMS works as a local ems now!") self.Operation_mode = default_operation_mode[ "LEMS"] # Change the working mode of universal energy management system return self.Operation_mode def information_send(self, microgrid, static_info): """ Collection information from local energy management systems :param microgrid: static information models of energy management system :param static_info: information model of static information models :return: the status of information sent """ static_info_formulated = static_information_formulation( microgrid, static_info) info_send = InformationSendReceive(self.socket, static_info_formulated) static_info_status = info_send.send() return static_info_status def database_start_up(self): """ Create database session for the local energy management system :return: Session class """ Session = db_session(scheduling_plan_local) return Session
def _fit(estimator, x_train): Logger.info("Training k-means clustering model started") start_time = time.time() model = estimator.fit(x_train) Logger.info("Training Gradient boosting model completed (Elapsed time: " + str(time.time() - start_time) + " seconds)") return model
def save_results(self): Logger.info("Saving results ...") with open(self.predictions_file, mode='w', encoding='utf-8') as csv_file: fieldnames = ['bio', 'label', 'predicted_label', 'probability'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for p in self.predictions: writer.writerow(p)
def execute(self): Logger.info( self.time, "Packet %s sent over link %s to %s" % (self.packet, self.link.id, self.destination)) transmission_delay = self.link.transmission_delay(self.packet) recv_time = self.time + transmission_delay + self.link.delay self.link.packets_on_link[self.link.get_direction_by_node( self.destination)].append(self.packet) self.link.dispatch( PacketReceivedEvent(recv_time, self.packet, self.destination, self.link))
def my_predict(self, bio: str): Logger.info("Predicting using model ...") prediction = self.model.predict(bio) predicted_label = self.Label.APPROPRIATE.value if prediction[0][ 0] == '__label__{}'.format(self.Label.APPROPRIATE.value ) else self.Label.INAPPROPRIATE.value formatted_prediction = { 'predicted_label': predicted_label, 'probability': prediction[1][0] } return formatted_prediction
def join_smalltv(self,obj): if self.num==200: print('[跳过抽奖]['+obj['title']+']已跳过抽奖(roomid=' + str(obj['roomid'])+ ',raffleId=' + str(obj['raffleId'])+ ')') return self.num+=1 res=API.Gift.join_smalltv(self.s,obj['roomid'], obj['raffleId'],self.Info['csrf_token'],self.Info['visit_id']) code=res['code'] if code==0:Logger.info('[自动抽奖]['+obj['title']+']已参加抽奖(roomid=' + str(obj['roomid'])+ ',raffleId=' + str(obj['raffleId'])+ ')') elif code==400:Logger.warning('[自动抽奖][礼物抽奖]访问被拒绝,您的帐号可能已经被封禁,已停止') elif code==65531:Logger.info('[自动抽奖][礼物抽奖]已参加抽奖(roomid=' + str(obj['roomid'])+ ',raffleId=' + str(obj['raffleId'])+ ')') else:Logger.error('[自动抽奖][礼物抽奖]已参加抽奖(roomid=' + str(obj['roomid'])+ ',raffleId=' + str(obj['raffleId'])+ ')'+res['msg'])
def train_supervised(self, auto=False, save=False, duration=120): Logger.info("Training model ...") if auto: self.model = fasttext.train_supervised( input=self.bio_train_path, autotuneValidationFile=self.bio_test_path, autotuneDuration=duration) else: self.model = fasttext.train_supervised(input=self.bio_train_path) if save: self.model.save_model(self.model_path)
def send_event(self, event: Event, ctx: DokBotContext = None): if ctx: # We don't actually need to send events if we are already within the bot process(ctx, event) else: if self.bot_queue is None: print("Cannot send an outgoing message.") return Log.info(f"Sending event {event}") self.bot_queue.send_message( MessageBody=str({"event": event.to_message()}))
def handle_receive(self, packet, time): if isinstance(packet, AckPacket): Rn = packet.request_number Sn, Sb, Sm = self.host.sequence_nums cwnd = self.host.cwnd if self.ss: self.set_window_size(time, cwnd + 1) if self.host.cwnd >= self.ssthresh: self.ss = False Logger.info(time, "SS phase over for Flow %s. CA started." % (self.host.flow[0])) elif Rn > Sb: # If we are in Congestion Avoidance mode, we wait for an RTT to # increase the window size, rather than doing it on ACK. self.set_window_size(time, cwnd + 1. / cwnd)
def __generate_training_and_test_series(self): Logger.info("Generating training and test datasets ...") self.__preprocess_bios() concatenated = PandasUtils.concat_series( [self.appropriate_bios, self.inappropriate_bios]) shuffled = PandasUtils.shuffle_series(concatenated) self.training_series = shuffled.head(self.number_of_training_records) self.test_series = shuffled.tail(self.number_of_test_records) FileUtils.write_series2file(self.bio_train_path, self.training_series) FileUtils.write_series2file(self.bio_test_path, self.test_series) self.test_list = self.test_series.to_list() self.cleaned_test_list = self.__remove_labels( self.test_series, ['__label__{}'.format(l.value) for l in self.Label]).tolist()
def predict_all(self): Logger.info("Predicting all using model ...") labels = self.__extract_assigned_labels_from_test_series() for i in range(self.number_of_test_records): prediction = self.model.predict(self.cleaned_test_list[i]) predicted_label = self.Label.APPROPRIATE.value if prediction[0][ 0] == '__label__{}'.format( self.Label.APPROPRIATE.value ) else self.Label.INAPPROPRIATE.value self.predictions.append({ 'bio': self.cleaned_test_list[i], 'label': labels[i], 'predicted_label': predicted_label, 'probability': prediction[1][0] })
async def listen(self, event_handler_factory: EventHandlerFactory): if self.bot_queue is None: print("Cannot listen to incoming messages.") return messages = self.bot_queue.receive_messages( MaxNumberOfMessages=10, WaitTimeSeconds=QUEUE_POLLING_PERIOD_SECS) for message in messages: event = Event.from_message(eval(message.body)["event"]) Log.info(f"Received event {event}") event_handler = await event_handler_factory.create_event_handler( event) await process_now(event_handler, event) if message: message.delete()
def handle_receive(self, packet, time): if isinstance(packet, AckPacket): Rn = packet.request_number Sn, Sb, Sm = self.host.sequence_nums cwnd = self.host.cwnd if self.ss: self.set_window_size(time, cwnd + 1) if self.host.cwnd >= self.ssthresh: self.ss = False Logger.info( time, "SS phase over for Flow %s. CA started." % (self.host.flow[0])) elif Rn > Sb: # If we are in Congestion Avoidance mode, we wait for an RTT to # increase the window size, rather than doing it on ACK. self.set_window_size(time, cwnd + 1. / cwnd)
def send(self, packet, link, time): """ Sends the given packet along the link at the specified time :param packet: Packet to send :type packet: Packet :param link: Link to send the packet through :type link: Link :param time: Time to send the packet :type time: float :return: Nothing :rtype: None """ assert len(self.links) > 0, "Can't send if links aren't connected" Logger.info(time, "%s sent packet %s over link %s." % (self, packet, link.id)) # Send the packet self.dispatch(PacketSentToLinkEvent(time, self, packet, link))
def send(self, packet, link, time): """ Sends the given packet along the link at the specified time :param packet: Packet to send :type packet: Packet :param link: Link to send the packet through :type link: Link :param time: Time to send the packet :type time: float :return: Nothing :rtype: None """ assert len(self.links) > 0, "Can't send if links aren't connected" Logger.info( time, "%s sent packet %s over link %s." % (self, packet, link.id)) # Send the packet self.dispatch(PacketSentToLinkEvent(time, self, packet, link))
def support_filter(self, min_proba={2: 5, 3: 5, 4: 5}): ngrams = self.ngrams total = self.total self.ngrams = {} Logger.info("starting compute support score") for word, count in tqdm(ngrams.items()): is_keep = False s = word if len(self.decode_str(s)) >= 2: numerator = [] for index in range(len(self.decode_str(s)) - 1): numerator.append(ngrams[self.encode_str( self.decode_str(s)[:index + 1])] * ngrams[self.encode_str( self.decode_str(s)[index + 1:])]) score = np.min(total * ngrams[s] / np.array(numerator)) print(s) if score > min_proba[len(self.decode_str(s))]: is_keep = True if is_keep: self.ngrams[word] = count
def put_transactions(transactions): log.info("Putting {} transactions to Kinesis stream".format( len(transactions))) if len(transactions) == 0: return records = [] for t in transactions: # log.plain("{}".format("{} {}".format(t.getField('Amount'), t.getField('Description')))) records.append({ 'Data': json.dumps(t.getItem()), 'PartitionKey': t.getField('UUID') }) records_block = 100 start_pos = 0 while start_pos < len(records): end_pos = min(start_pos + records_block, len(records) - 1) kinesis_client.put_records(StreamName=stream_name, Records=records[start_pos:end_pos]) start_pos += records_block
def split(self): if self.file_format.lower() == "uirt": columns = ["user", "item", "rating", "time"] if self.by_time is False: by_time = False else: by_time = True elif self.file_format.lower() == "uir": columns = ["user", "item", "rating"] by_time = False else: raise ValueError("There is not data format '%s'" % self.file_format) all_data = load_data(self.filename, sep=self.sep, columns=columns) filtered_data = filter_data(all_data, user_min=self.user_min, item_min=self.item_min) remapped_data, user2id, item2id = remap_id(filtered_data) if self.spliter == "ratio": train_data, test_data = split_by_ratio(remapped_data, ratio=self.ratio, by_time=by_time) elif self.spliter == "loo": train_data, test_data = split_by_loo(remapped_data, by_time=by_time) else: raise ValueError("There is not splitter '%s'" % self.spliter) np.savetxt(self.filename + ".train", train_data, fmt="%s", delimiter="\t") np.savetxt(self.filename + ".test", test_data, fmt="%s", delimiter="\t") user_id = [[str(user), str(id)] for user, id in user2id.items()] np.savetxt(self.filename + ".user2id", user_id, fmt="%s", delimiter="\t") item_id = [[str(item), str(id)] for item, id in item2id.items()] np.savetxt(self.filename + ".item2id", item_id, fmt="%s", delimiter="\t") user_num = len(np.unique(remapped_data["user"])) item_num = len(np.unique(remapped_data["item"])) rating_num = len(remapped_data["item"]) sparsity = 1 - 1.0 * rating_num / (user_num * item_num) logger = Logger(self.filename + ".info") # TODO add parameters to log logger.info("The number of users: %d" % user_num) logger.info("The number of items: %d" % item_num) logger.info("The number of ratings: %d" % rating_num) logger.info("The sparsity of the dataset: %f%%" % (sparsity * 100))
def inf_filter(self): min_inf = self.min_inf pre_suffix = self.pre_suffix after_suffix = self.after_suffix ngrams = self.ngrams self.ngrams = {} Logger.info("start computing inf scores") for word, count in tqdm(ngrams.items()): is_keep = False i = word pre_array = np.array(list(pre_suffix[i].values()), dtype=np.double) prob_array = pre_array / np.sum(pre_array) pre_info = -prob_array.dot(np.log(prob_array.transpose())) after_array = np.array(list(after_suffix[i].values()), dtype=np.double) prob_array = after_array / np.sum(after_array) after_info = -prob_array.dot(np.log(prob_array.transpose())) if pre_info > min_inf and after_info > min_inf: is_keep = True if is_keep: self.ngrams[word] = count
def run(): ## Operation process for UEMS logger = Logger('Universal_ems_main') # The logger system has been started db_str = db_configuration.universal_database["db_str"] # Database format engine = create_engine(db_str, echo=False) # Create engine for universal energy management system databases Session = sessionmaker(bind=engine) # Create engine for target database session_uems = Session() # Create session for universal energy management system # IP = "10.25.196.56" IP = "*" # Start the information connection context = zmq.Context() socket = context.socket(zmq.REP) socket.bind("tcp://" + IP + ":5555") socket_upload = context.socket(zmq.REP) # Upload information channel for local EMS socket_upload.bind("tcp://" + IP + ":5556") socket_download = context.socket(zmq.REQ) # Download information channel for local EMS socket_download.bind("tcp://" + IP + ":5557") initialize = Main(socket) universal_models = initialize.universal_models local_models = initialize.local_models # Start the input information info_ed = economic_dispatch_info.local_sources() # Dynamic information for economic dispatch info_uc = economic_dispatch_info.local_sources() # Dynamic information for unit commitment info_opf = opf_model.informaiton_exchange() # Optimal power flow modelling # Generate different processes logger.info("The short term process in UEMS starts!") sched_short_term = BlockingScheduler() # The schedulor for the optimal power flow sched_short_term.add_job(short_term_operation.short_term_operation_uems, 'cron', args=(universal_models, local_models, socket_upload, socket_download, info_opf, session_uems), minute='0-59', second='1') # The operation is triggered minutely, this process will start at **:01 sched_short_term.start() logger.info("The middle term process in UEMS starts!") sched_middle_term = BlockingScheduler() # The schedulor for the optimal power flow sched_middle_term.add_job(middle_term_operation.middle_term_operation_uems, 'cron', args=(universal_models, local_models, socket_upload, socket_download, info_ed, session_uems), minute='*/5', second='1') # The operation is triggered every 5 minute sched_middle_term.start() short_term_operation.short_term_operation_uems(universal_models, local_models, socket_upload, socket_download, info_opf, session_uems) logger.info("The long term process in UEMS starts!") sched_long_term = BlockingScheduler() # The schedulor for the optimal power flow sched_long_term.add_job(long_term_operation.long_term_operation_uems, 'cron', args=(universal_models, local_models, socket_upload, socket_download, info_uc, session_uems), minute='*/30', second='1') # The operation is triggered every half an hour sched_long_term.start()
def __init__(self, number_of_appropriate_bios_records: int = 2000, number_of_training_records: int = 2115, number_of_test_records: int = 0): self.number_of_appropriate_bios_records = number_of_appropriate_bios_records self.inappropriate_bios = PandasUtils.select_series( FileUtils.read_excel_file(self.inappropriate_bios_path), self.ColNames.BIO.value) self.number_of_inappropriate_bios_records = len( self.inappropriate_bios.index) self.number_of_all_bios_records = self.number_of_appropriate_bios_records + self.number_of_inappropriate_bios_records self.number_of_training_records = number_of_training_records self.number_of_test_records = min( self.number_of_all_bios_records - self.number_of_training_records, number_of_test_records ) if number_of_test_records else self.number_of_all_bios_records - self.number_of_training_records self.bios = FileUtils.read_excel_file(self.bios_path) self.appropriate_bios = PandasUtils.select_series( self.bios.head(self.number_of_appropriate_bios_records), self.ColNames.BIO.value) self.__generate_training_and_test_series() self.model = None self.predictions = [] Logger.info( "Number of appropriate labeled bios records is : {}".format( self.number_of_appropriate_bios_records)) Logger.info( "Number of inappropriate labeled bios records is : {}".format( self.number_of_inappropriate_bios_records)) Logger.info("Number of training_records is : {}".format( self.number_of_training_records)) Logger.info("Number of test records is : {}".format( self.number_of_test_records))
def preprocessing(overwrite=False): logger = Logger(name="preprocessing", log_level="INFO", file_log=False) logger.info("start") train = pd.read_csv("../input/train.csv") test = pd.read_csv("../input/test.csv") target_cols = list(set(train.columns) - set(test.columns)) target = train.loc[:, target_cols] target.to_pickle("../input/target.pkl") logger.info("save target") generate_features(globals(), train, test, overwrite) logger.info("finish")
class DataGatherer(object): def __init__(self, cfg): self.cfg = cfg self.downloader = Downloader(cfg) self.logger = Logger(cfg) self.parser = AnotherHTMLParser(self.logger) self.pairs = set() self.db_handler = DBHandler(cfg) self._word_dict = None def read_raw_pairs(self, delimiter=',', limit=0): path = cfg['train_path'] try: f = open(path) except IOError: self.logger.critical("Can't open file '{}'!".format(path)) sys.exit() lines = f.read().split('\n') pairs = set() i = 0 for line in lines: if not line: continue if limit and i > limit: break i += 1 elements = line.split(delimiter) try: if elements[2] == 'left': pair = (elements[0], elements[1]) else: pair = (elements[1], elements[0]) if pair in pairs: self.logger.warning('pair {} is duplicate!'.format(pair)) i -= 1 pairs.add(pair) except IndexError: raise AssertionError('line {} is incorrect!'.format(line)) return pairs def read_pairs(self, delimiter=',', limit=0): path = cfg['train_fixed_path'] try: f = open(path) except IOError: self.logger.critical("Can't open file '{}'!".format(path)) sys.exit() lines = f.read().split('\n') pairs = set() i = 0 for line in lines: if not line: continue if limit and i > limit: break i += 1 elements = line.split(delimiter) try: pair = tuple(elements) if pair in pairs: self.logger.warning('pair {} is duplicate!'.format(pair)) i -= 1 pairs.add(pair) except IndexError: raise AssertionError('line {} is incorrect!'.format(line)) return pairs def exclude_untracked_videos(self, pairs): ids = set(self.db_handler.get_all_video_ids()) pairs_set = set(pairs) for pair in pairs: for youtube_id in pair: if youtube_id not in ids: pairs_set.remove(pair) break return pairs_set def rewrite_pairs(self, pairs): pairs_fixed = self.exclude_untracked_videos(pairs) f = open(self.cfg['train_fixed_path'], 'wb') for pair in pairs_fixed: f.write(','.join(pair) + '\n') f.close() def fill_video_catalog(self, pairs, force=False): lefts_and_rights = zip(*pairs) ids = set(lefts_and_rights[0] + lefts_and_rights[1]) if not force: ids_cache = set(self.db_handler.get_all_video_ids()) ids.difference_update(ids_cache) for i, youtube_id in enumerate(ids): if i % 100 == 0: self.logger.info('scanned {} lines.'.format(i)) self.add_video_by_id(youtube_id) def update_video_catalog(self, limit=None): ids_cache = set(self.db_handler.get_all_video_ids()) for i, youtube_id in enumerate(ids_cache): if limit and i > limit: break self.update_video_by_id(youtube_id) def add_video_by_id(self, youtube_id): html = self.downloader.get_html(youtube_id) if not self.parser._check_video_availability(html): return video_item = Video(youtube_id) video_item.update(title=self.parser.get_video_title(html)) self.db_handler.add_entry(video_item) def update_video_by_id(self, youtube_id): html = self.downloader.get_html(youtube_id) if not self.parser._check_video_availability(html): return video_item = self.db_handler.get_video_by_youtube_id(youtube_id) try: video_item.update( title=self.parser.get_video_title(html), views=self.parser.get_view_count(html), likes=self.parser.get_likes_count(html), dislikes=self.parser.get_dislikes_count(html), ) except ParseError: video_item.mark_invalid() self.db_handler.commit() def update_rank1s(self, pairs): videos = self.db_handler.get_all_videos() rank1_map = self.get_rank1_map(pairs) for video in videos: if video.youtube_id in rank1_map: video.rank1 = rank1_map[video.youtube_id] else: self.logger.warning('video {} has no rank calculated!'.format(video.youtube_id)) self.db_handler.db_session.commit() def update_rank2s(self, catalog, pairs): videos = self.db_handler.get_all_videos() rank2_map = self.get_rank2_map(catalog, pairs) for video in videos: if video.youtube_id in rank2_map: video.rank2 = rank2_map[video.youtube_id] else: self.logger.warning('video {} has no rank calculated!'.format(video.youtube_id)) self.db_handler.db_session.commit() def update_views(self, force=False): if force: videos = self.db_handler.get_all_videos() else: videos = self.db_handler.db_session.query(Video).filter(Video.views == None).all() for video in videos: try: video.views = self.parser.get_view_count(self.downloader.get_html(video.youtube_id)) except ParseError: pass self.db_handler.commit() def get_video_catalog(self): return self.db_handler.get_all_video_data() def get_rank1_map(self, pairs): ids_above, ids_below = zip(*pairs) rank_map = defaultdict(lambda: 0) for youtube_id in ids_above: rank_map[youtube_id] += 1 for youtube_id in ids_below: rank_map[youtube_id] -= 1 return rank_map def get_rank2_map(self, catalog, pairs): chunks = partial_sort(catalog, pairs) aggregated_ranks = calculate_aggregated_ranks(chunks) assert len(aggregated_ranks) == len(chunks) ranked_chunks = zip(aggregated_ranks, chunks) r_map = {} for rank, chunk in ranked_chunks: for youtube_id in chunk: r_map[youtube_id] = rank return r_map def get_char_stat(self): characters = set() videos = self.db_handler.get_all_videos() for video in videos: if video.title: characters.update(video.title) return sorted(list(characters)) def update_lang_stat(self): videos = self.db_handler.get_all_videos() for video in videos: if video.title: video.lang = get_lang(video.title) self.db_handler.commit() def get_all_words(self): words = defaultdict(lambda: 0) print 'delimiters: {}'.format(TITLE_DELIMITER) videos = self.db_handler.get_all_videos() for video in videos: for word in extract_words(video.title): words[prepare_word(word)] += 1 return words def fill_word_db(self, words): for w, count in words.iteritems(): word = Word(w, None, count) self.db_handler.db_session.add(word) self.db_handler.commit() def fill_words_for_videos(self): words = self.db_handler.db_session.query(Word).all() word_dict = {} for word in words: word_dict[word.word] = word videos = self.db_handler.get_all_videos() for video in videos: wordids = set() for word in extract_words(video.title): w = prepare_word(word) if w in word_dict: wordids.add(word_dict[w].id) video.wordids = serialize_ids(wordids) self.db_handler.commit() def calculate_rank1_for_words(self): words = self.db_handler.db_session.query(Word).filter(Word.count >= 10).all() word_dict = {} for word in words: word_dict[word.id] = word rank_dict = defaultdict(lambda: []) videos = self.db_handler.get_all_videos() for video in videos: word_ids = deserialize_ids(video.wordids) for word_id in word_ids: if word_id not in word_dict: continue rank_dict[word_id].append(video.rank1) for word_id in rank_dict: if word_id not in word_dict: continue word_dict[word_id].rank1 = mean(rank_dict.setdefault(word_id, [0])) # kostyl! set rank = 0 for word '' null_word = self.db_handler.db_session.query(Word).filter(Word.word == '').one() null_word.rank1 = 0 # -- self.db_handler.commit() def get_word_dict_by_word(self): if not self._word_dict: words = self.db_handler.db_session.query(Word).all() self._word_dict = {} for word in words: self._word_dict[word.word] = word return self._word_dict def calculate_title_rank(self, title, f): word_dict = self.get_word_dict_by_word() title_words = extract_words(title) title_rank = sum(f(word_dict[x]) for x in title_words if x in word_dict) return title_rank
from config import JOBSTORES, EXECUTORS, JOB_DEFAULTS, LOGGER, LOG_TO from utils import Logger app = Flask(__name__) app.config.from_object('config') if not os.path.exists(LOG_TO): os.makedirs(LOG_TO) fh = logging.FileHandler(os.path.join(LOG_TO, LOGGER.get('file'))) fh.setLevel(LOGGER.get('level')) fh.setFormatter(LOGGER.get('formatter')) log = Logger("Roller", fh) log.info("Service started!") sched_log = Logger("apscheduler.executors.default", fh) sched_log_jobstore = Logger("apscheduler.jobstores.default", fh) scheduler = BackgroundScheduler(jobstores=JOBSTORES, executors=EXECUTORS, job_defaults=JOB_DEFAULTS, timezone=utc) scheduler.start() log.info("Scheduler started!") def onstop(): # Shut down the scheduler when exiting the app scheduler.shutdown() log.info("Scheduler shutdown") log.info("Service stopped!")
def set_window_size(self, time, value): flow_id = self.flow[0] Logger.info(time, "Window size changed from %0.2f -> %0.2f for flow %s" % (self.cwnd, value, flow_id)) self.cwnd = value self.dispatch(WindowSizeEvent(time, flow_id, self.cwnd))
def execute(self): Logger.info(self.time, "Packet %s received at %s" % (self.packet, self.destination)) self.destination.receive(self.packet, self.time)
if not os.path.exists(config.LOG_TO): os.makedirs(config.LOG_TO) if not os.path.exists(config.TMP_DIR): os.makedirs(config.TMP_DIR) fh = logging.FileHandler(os.path.join(config.LOG_TO, config.LOGGER.file)) fh.setLevel(config.LOGGER.level) fh.setFormatter(config.LOGGER.formatter) pw_fh = logging.FileHandler(os.path.join(config.LOG_TO, config.LOGGER.peewee_file)) pw_fh.setLevel(config.LOGGER.level) pw_fh.setFormatter(config.LOGGER.formatter) peewee_log = Logger(pw_fh, "peewee") log = Logger(fh, "My.trio") log.info("Service started!") recaptcha.init_app(app, GOOGLE_KEY, GOOGLE_SECRET_KEY) from my_trio.accounts.views import register_page from my_trio.accounts.errors import error_page from my_trio.accounts import errors, views app.register_blueprint(register_page) app.register_blueprint(error_page)
from utils import Logger VERSION="1.0.0" class MyHandler(tornado.web.RequestHandler): @tornado.web.asynchronous def get(self): def __callback(response): data = { 'body': response.body[:20] + '...', 'length' : len(response.body) } self.set_header('Content-Type', 'application/json; charset="utf-8"') self.write(json.dumps(data)) self.finish() async_client = tornado.httpclient.AsyncHTTPClient() request = tornado.httpclient.HTTPRequest("http://www.uol.com.br/") async_client.fetch(request, __callback) application = tornado.web.Application([ (r"/", MyHandler), ]) global logger global http_server if __name__ == "__main__": tornado.options.parse_command_line() logger = Logger('info', False) logger.info('starting webasync v%s' % VERSION) application.listen(8888) tornado.ioloop.IOLoop.instance().start()
def receive(self, packet, time): """ Handles receipt of a packet. :param packet: Packet received :type packet: Packet | AckPacket :param time: Time the packet was received :type time: int :return: Nothing :rtype: None """ Logger.info(time, "%s received packet %s." % (self, packet)) # Ack packet, drop stored data that might need retransmission if isinstance(packet, AckPacket): flow_id = packet.flow_id Rn = packet.request_number if self.current_request_num is None: self.current_request_num = Rn else: self.current_request_num = max(Rn, self.current_request_num) # Receiving request number Rn means every packet with sequence # number <= Rn - 1 was received, so those have been acked. No need # to wait for their ack or to resend. acked_packets = [] for packet_id, packet_data in self.awaiting_ack.items(): acked_packet, _ = packet_data if acked_packet.sequence_number < Rn: acked_packets.append(packet_id) for acked_packet_id in acked_packets: acked_packet, sent_time = self.awaiting_ack[acked_packet_id] if acked_packet in self.queue: self.queue.remove(acked_packet) del self.awaiting_ack[acked_packet_id] self.dispatch(RTTEvent(flow_id, time, time - sent_time)) self.congestion_control.handle_receive(packet, time) Sn, Sb, Sm = self.sequence_nums if Rn > Sb: Sm = Sm + (Rn - Sb) Sb = Rn Sn = Sb self.send_packets(time, flow_id) self.sequence_nums = (Sn, Sb, Sm) elif isinstance(packet, RoutingPacket): return # Regular packet, send acknowledgment of receipt elif isinstance(packet, FlowPacket): if packet.flow_id not in self.request_nums: self.request_nums[packet.flow_id] = 0 if packet.sequence_number == self.request_nums[packet.flow_id]: Logger.warning(time, "Packet %d accepted from %s" % (packet.sequence_number, packet.src)) self.request_nums[packet.flow_id] += 1 else: Logger.info(time, "Incorrect packet received from %s. Expected %d, got %d." % (packet.src, self.request_nums[packet.flow_id], packet.sequence_number)) ack_packet = AckPacket(packet.flow_id, self, packet.src, self.request_nums[packet.flow_id], packet) self.send(ack_packet, time) # Ignore routing packets else: raise UnhandledPacketType
random.seed(2020) # 命令行参数 parser = argparse.ArgumentParser(description='binetwork 召回') parser.add_argument('--mode', default='valid') parser.add_argument('--logfile', default='test.log') args = parser.parse_args() mode = args.mode logfile = args.logfile # 初始化日志 os.makedirs('../user_data/log', exist_ok=True) log = Logger(f'../user_data/log/{logfile}').logger log.info(f'binetwork 召回,mode: {mode}') def cal_sim(df): user_item_ = df.groupby('user_id')['click_article_id'].agg( list).reset_index() user_item_dict = dict( zip(user_item_['user_id'], user_item_['click_article_id'])) item_user_ = df.groupby('click_article_id')['user_id'].agg( list).reset_index() item_user_dict = dict( zip(item_user_['click_article_id'], item_user_['user_id'])) sim_dict = {}
class RTC(object): def __init__(self, dev=None, enable_ems=False): self.dev = dev self.logger = Logger('RTC') self.can_id = None # todo need to be further confirm self.task = None self.ui_queue = None self.db = None self.enable_ems = enable_ems self.ems_data = Data() if self.enable_ems else None self.scheduler = BackgroundScheduler(daemon=True) self.set_scheduler() def api(self, cmd, parameters): self.logger.info('Real-Time Controller api() ' + ' ' + cmd + ' ' + json.dumps(parameters)) if cmd == 'TURN_OFF': self.turn_off() elif cmd == 'TURN_ON': self.turn_on() def contingency_analysis_classifier(self, can_message): pass def turn_off(self): self.logger.info('TURNING OFF module') def turn_on(self): self.logger.info('TURNING ON module') if self.task is None: self.task = TurnOn(self, timeout=15) else: return False def hi(self): print(ctime(time()), end='\r', flush=True) if self.task is not None: self.task.state.spent_t += 0.5 self.task.spent_t += 0.5 self.task.run() if self.task.spent_t > self.task.TIMEOUT: self.logger.error('Task ' + self.task.name + ' fail...') self.task = None def get_ems_data(self): results_dispatch = self.db.query(DispatchDBData).limit(2) results_unit = self.db.query(UnitsDBData).limit(2) results_forecast = self.db.query(ForecastDBData).limit(2) # todo process every line of result and send to GUI for d in results_dispatch: res = d.__dict__ res.pop('_sa_instance_state') res['datetime1'] = mktime(d.datetime1.timetuple()) + d.datetime1.microsecond / 1000000.0 self.ems_data.update(res) self.ui_queue.put(self.ems_data) def set_scheduler(self): self.scheduler.add_job(self.hi, 'interval', seconds=0.5) if self.enable_ems: if self.db is None: self.db = db_session() self.scheduler.add_job(self.get_ems_data, 'interval', seconds=1) self.scheduler.start() def is_bic_online(self): for dev in self.dev['bic']: if not dev.is_connected(): self.logger.error((dev.name, ' is not online')) return False return True def minimum_system_online(self): for dev in self.dev['battery']: if not dev.is_connected(): self.logger.error((dev.name, ' is not online')) return False for dev in self.dev['rm']: if not dev.is_connected(): self.logger.error((dev.name, ' is not online')) return False return True
class IosFormatter: def __init__(self, reader, add_comment=True): self.reader = reader self.logger = Logger(tag=self.__class__.__name__) self.add_comment = add_comment def generate(self): en_strings = '' fr_strings = '' if (self.add_comment): en_strings += '/* Auto-generated by Poor Man\'s Localization */\n' fr_strings += '/* Auto-generated by Poor Man\'s Localization */\n' self.logger.info("Parsing iOS strings") next(self.reader) # skip the first line for line in self.reader: if (line[1] == EXCLUSION_ID): continue en_strings += '"{}" = "{}"\n'.format(line[1], line[2]) # Use same strings if translatable false is applied translated_string = (line[3], line[2])[line[3] == TRANSLATABLE_FALSE] fr_strings += '"{}" = "{}"\n'.format(line[1], translated_string) en_filename = self.__get_en_strings_file() fr_filename = self.__get_fr_strings_file() if not os.path.exists(os.path.dirname(en_filename)): try: os.makedirs(os.path.dirname(en_filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise if not os.path.exists(os.path.dirname(fr_filename)): try: os.makedirs(os.path.dirname(fr_filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise en_file = open(en_filename, 'w') en_file.write(en_strings) en_file.close() self.logger.info("Created: %s" % en_filename) fr_file = open(fr_filename, 'w') fr_file.write(fr_strings) fr_file.close() self.logger.info("Created: %s" % fr_filename) @staticmethod def __get_en_strings_file(): return resource_path( os.path.join(OUTPUT_DIR, EN_LANG + DIR_LPROJ, STRINGS_FILENAME)) @staticmethod def __get_fr_strings_file(): return resource_path( os.path.join(OUTPUT_DIR, FR_LANG + DIR_LPROJ, STRINGS_FILENAME))
def execute(self): Logger.info(self.time, "Packet %s sent to link %s from %s" % (self.packet, self.link.id, self.origin)) self.link.send(self.time, self.packet, self.origin)
random.seed(seed) # 命令行参数 parser = argparse.ArgumentParser(description='lightgbm 排序') parser.add_argument('--mode', default='valid') parser.add_argument('--logfile', default='test.log') args = parser.parse_args() mode = args.mode logfile = args.logfile # 初始化日志 os.makedirs('../user_data/log', exist_ok=True) log = Logger(f'../user_data/log/{logfile}').logger log.info(f'lightgbm 排序,mode: {mode}') def train_model(df_feature, df_query): df_train = df_feature[df_feature['label'].notnull()] df_test = df_feature[df_feature['label'].isnull()] del df_feature gc.collect() ycol = 'label' feature_names = list( filter( lambda x: x not in [ycol, 'created_at_datetime', 'click_datetime'], df_train.columns)) feature_names.sort()
def execute(self): Logger.info(self.time, "Packet %s sent over link %s to %s" % (self.packet, self.link.id, self.destination)) transmission_delay = self.link.transmission_delay(self.packet) recv_time = self.time + transmission_delay + self.link.delay self.link.packets_on_link[self.link.get_direction_by_node(self.destination)].append(self.packet) self.link.dispatch(PacketReceivedEvent(recv_time, self.packet, self.destination, self.link))