def extract_videos(basepath): Logger.debug("Generating videos") extractDir = basepath + "/extracted" infoFile = basepath + "/info.json" if not os.path.exists(infoFile): Logger.error("info file not found! Exiting.") return with open(infoFile) as f: info = json.load(f) # generate videos videoDir = basepath + '/videos' for topic in info['topics']: topicName = topic['topic'] msgType = topic['type'] # create videos from only the left lens/camera if msgType == 'sensor_msgs/Image' and 'left' in topicName: imagesDir = extractDir + topicName if not os.path.exists(imagesDir): Logger.error("Doesn't exist - " + imagesDir) continue make_dirs(videoDir) filename = topicName.replace('image_raw', '').replace('left', 'Left').replace('/', '') + '.mp4' videoFile = videoDir + '/' + filename Logger.debug('creating ' + videoFile) makeVideo(imagesDir, output=videoFile, fps=10) # Annotations dir for storing video comments annotDir = basepath + '/annotations' make_dirs(annotDir) os.chmod(annotDir, stat.S_IRWXU | stat.S_IRWXG | stat.S_ISGID | stat.S_IRWXO)
def __basegrid__(bond=None, grid=None): """ """ bonds = [] if bond: Logger.debug(bond.getAttributString(), 0) #print(bond.getAttributString()) if grid == None: grid = {} start = SingletonTopo.C1FREE[0] grid[start] = [0, 0] bonds = SingletonTopo.get_directedbond(start) else: row = grid[bond.parent_ose][0] col = grid[bond.parent_ose][1] - 1 if bond.parent_carbon >= 5: row -= 1 elif bond.parent_carbon in [2, 3]: row += 1 if bond.parent_carbon == 2: col += 1 grid[bond.child_ose] = [row, col] bonds = SingletonTopo.get_directedbond(bond.child_ose) if len(bonds) > 0: for b in bonds: SingletonTopo.__basegrid__(b, grid) return grid
class ArtistDao: TABLE_NAME = "artists" ARTIST_TO_GENRE_TABLE = "artist_to_genre" GENRE_TABLE = "genres" def __init__(self, id, name, popularity, follower_count, genres): self.id = id self.name = name self.popularity = popularity self.follower_count = follower_count if not isinstance(genres, list): ValueError(f'var "genres" must be a list, got {genres}') else: self.genres = genres self.logger = Logger(caller_filepath=__file__, clazz=self.__class__) def _insert_genres(self): conn = SqliteBoss.get_conn() c = conn.cursor() genre_ids = [] self.logger.debug(f"Inserting genres: {self.genres}") for i in self.genres: self.logger.debug(f"Inserting genre: {i}") c.execute('INSERT OR IGNORE INTO genres VALUES (?)', (i, )) conn.commit() conn.close() def _insert_genre_mappings(self): conn = SqliteBoss.get_conn() c = conn.cursor() for i in self.genres: c.execute('INSERT OR REPLACE INTO artist_to_genre VALUES (?,?)', ( self.id, i, )) conn.commit() conn.close() def insert(self): conn = SqliteBoss.get_conn() c = conn.cursor() self._insert_genres() self._insert_genre_mappings() happened_at = datetime.datetime.now() c.execute('INSERT OR IGNORE INTO artists VALUES (?,?,?,?,?,?)', ( self.id, self.name, self.popularity, self.follower_count, happened_at, happened_at, )) conn.commit() conn.close()
def extract_lidar_points(bag, extractPath): topics = getTopics(bag, 'sensor_msgs/PointCloud2') Logger.debug(('interested lidar topics = %s \n' % topics)) for topic in topics: extractDir = extractPath + '/' + topic make_dirs(extractDir) frame = 0 timestampFile = extractPath + topic + "/timestamp.csv" timestampsInSec, timestampsInString = [], [] for tpic, msg, time in tqdm(bag.read_messages(topics=[topic]), unit=" frame", desc="extracting " + topic): timestampSec = msg.header.stamp.to_time() dtime = datetime.fromtimestamp(timestampSec) timestampString = dtime.strftime(TIME_FILE_NAME_FORMAT) timestampsInSec.append(timestampSec) timestampsInString.append(timestampString) data = [] for point in sensor_msgs.point_cloud2.read_points(msg, skip_nans=True): data.append({ 'x': point[0], # x coordinate 'y': point[1], # y coordinate 'z': point[2], # z coordinate 'i': point[3], # intensity 'r': point[4] # ring }) frameFile = extractDir + "/" + str(frame).zfill(7) + ".json" with open(frameFile, 'w') as f: f.write(json.dumps(data)) frame += 1 # write timestamps of this topic df = pd.DataFrame(columns=['sec', 'datetime']) df['sec'] = timestampsInSec df['datetime'] = timestampsInString df.index.name='frame' df.to_csv(timestampFile, float_format='%.7f')
def execute(self): # If the packet that was sent to trigger this link free event is still # on the link, that's fine; this event is what means that it's off the # link, so we can remove it. if self.packet in self.link.packets_on_link[3 - self.direction]: self.link.packets_on_link[3 - self.direction].remove(self.packet) # Now, we check that there's nothing on the other side of the link. # If the link is currently sending data in the other direction, we # can't do anything here. if self.link.packets_on_link[3 - self.direction] != []: return destination = self.link.get_node_by_direction(self.direction) origin = self.link.get_node_by_direction(3 - self.direction) Logger.debug( self.time, "Link %s freed towards node %d (%s)" % (self.link.id, self.direction, destination)) self.link.in_use = False self.link.current_dir = None next_packet_in_dir = self.link.buffer.pop_from_buffer( self.direction, self.time) if next_packet_in_dir is not None: Logger.debug(self.time, "Buffer exists toward node %d" % (self.direction)) self.link.send(self.time, next_packet_in_dir, origin, from_free=True)
def extract_bag_info(bag, base_path): info_file = base_path + '/info.json' baginfo = yaml.load(bag._get_yaml_info()) with open(info_file, 'w+') as f: f.write('%s' % json.dumps(baginfo)) f.close() Logger.debug(json.dumps(baginfo))
def extract_images(bag, extractPath, img_type="raw", hist=False): topic_raw_image = getTopics(bag, 'sensor_msgs/Image') Logger.debug(('intesrested image topics = %s' % topic_raw_image)) jobs = [] for topic in topic_raw_image: #p = Process(target=extract_topic_images, args=(bag, topic, extractPath, img_type, hist, )) #jobs.append(p) #p.start() extract_topic_images(bag, topic, extractPath, img_type, hist)
def handle_routing_packet(self, packet, dynamic): """ Updates the cost and routing tables using the given routing packet :param packet: Routing packet to update tables for :type packet: RoutingPacket :param dynamic: Whether we're handling a dynamic or static packet :type dynamic: bool :return: Nothing :rtype: None """ # No routing table yet. Begin creation, then handle this packet if not self._get_intermediate_routing_table(dynamic): self.create_routing_table(dynamic) did_update = False cost_table = packet.costTable src_id = packet.src.id # Get the appropriate routing table routing_table = self._get_intermediate_routing_table(dynamic) # Update costs by adding the cost to travel to the source node src_cost = routing_table[src_id].cost for identifier in cost_table.keys(): cost_table[identifier] = cost_table[identifier] + src_cost src_link = routing_table[src_id].link # Update our routing table based on the received table for identifier, cost in cost_table.items(): # New entry to tables or smaller cost if identifier not in routing_table or \ cost < routing_table[identifier].cost: did_update = True routing_table[identifier] = LinkCostTuple(src_link, cost) # Store and broadcast the updated table if an update occurred if did_update: self.sameDataCounter = 0 self.store_routing_table(dynamic, routing_table) new_cost_table = self.cost_table_from_routing_table(dynamic) self.broadcast_table(new_cost_table, dynamic) else: self.sameDataCounter += 1 # Log the same data receipt Logger.debug(Network.get_time(), "%s received no new routing table " "data." % self) # Log finalized routing table Logger.trace( Network.get_time(), "%s final %s routing table:" % (self, "dynamic" if dynamic else "static")) if dynamic: self.handle_same_dynamic_routing_table()
class _Job(object): def __init__(self): self.log = Logger(self.__class__.__name__, fh) self.log.debug("Job is created") def execute(self, **kwargs): try: self.log.debug("Start job with kwargs=%s" % kwargs) self._execute(**kwargs) self.log.debug("Finish job successful") except Exception as e: self.log.exception("Error during job execution") subject = 'Tasker Information. Произошла ошибка в скрипте %s' % self.__class__.__name__ self.log.debug(subject) # send_email(subject, as_text(e.message), # send_from=SMTP_SETTINGS['username'], # server=SMTP_SETTINGS['server'], # port=SMTP_SETTINGS['port'], # user=SMTP_SETTINGS['username'], # passwd=SMTP_SETTINGS['password'], # dest_to=ERROR_EMAILS) def _execute(self, **kwargs): raise NotImplementedError("%s._execute" % self.__class__.__name__) @classmethod def run(cls, **kwargs): log.debug("in _Job.run!") return cls().execute(**kwargs)
def get_subname(identifier): """ :param identifier: indification number :type: int :return: the substituent name according to the identifier :rtype: string """ s = SubstitutionLibrary.getSub(identifier) if s: return s.name Logger.debug("identifier not found: %i" % identifier, 1) return ""
def handle_routing_packet(self, packet, dynamic): """ Updates the cost and routing tables using the given routing packet :param packet: Routing packet to update tables for :type packet: RoutingPacket :param dynamic: Whether we're handling a dynamic or static packet :type dynamic: bool :return: Nothing :rtype: None """ # No routing table yet. Begin creation, then handle this packet if not self._get_intermediate_routing_table(dynamic): self.create_routing_table(dynamic) did_update = False cost_table = packet.costTable src_id = packet.src.id # Get the appropriate routing table routing_table = self._get_intermediate_routing_table(dynamic) # Update costs by adding the cost to travel to the source node src_cost = routing_table[src_id].cost for identifier in cost_table.keys(): cost_table[identifier] = cost_table[identifier] + src_cost src_link = routing_table[src_id].link # Update our routing table based on the received table for identifier, cost in cost_table.items(): # New entry to tables or smaller cost if identifier not in routing_table or \ cost < routing_table[identifier].cost: did_update = True routing_table[identifier] = LinkCostTuple(src_link, cost) # Store and broadcast the updated table if an update occurred if did_update: self.sameDataCounter = 0 self.store_routing_table(dynamic, routing_table) new_cost_table = self.cost_table_from_routing_table(dynamic) self.broadcast_table(new_cost_table, dynamic) else: self.sameDataCounter += 1 # Log the same data receipt Logger.debug(Network.get_time(), "%s received no new routing table " "data." % self) # Log finalized routing table Logger.trace(Network.get_time(), "%s final %s routing table:" % (self, "dynamic" if dynamic else "static")) if dynamic: self.handle_same_dynamic_routing_table()
def doAction(gpsFilePath): Logger.debug("reading gps data from " + gpsFilePath) gpsData = None with open(gpsFilePath) as f: gpsData = json.load(f) if gpsData == None or gpsData["data"] == None: Logger.error("No gps data found in " + gpsFilePath) return data = gpsData["data"] distance = calcDistance(data) Logger.debug("distance = " + str(distance) + " km") return distance
def findFilename(name): # connecting to wwe database Logger.debug("connecting to database...") setupdb() for bagfile in RawBagfile.objects(filename=name): print("filename = " + bagfile.filename) print("key = " + bagfile.key) print("vendor = " + bagfile.vendor) print("hsize = " + bagfile.hsize) print("hduration = " + bagfile.hduration) print("location = " + bagfile.location) print("capturedate = " + bagfile.capturedate) print("path = " + bagfile.path) print("distance = " + bagfile.distance)
def download_file_by_url(url, saveAs): """ Download file by url. """ Logger.debug("Url to download: {}".format(url)) try: resp = requests.get(url) with open(saveAs, "wb") as f: f.write(resp.content) except Exception as e: Logger.error("Error orrcured while downloading file:") Logger.error(str(e)) return False return True
class IndexController(object): def __init__(self, request, scheduler): self.request = request self.scheduler = scheduler self.log = Logger(self.__class__.__name__, fh) def call(self): try: self.log.debug("Start process request:" % self.request) sched_status = self.scheduler.state job_list = JobListController(self.request, self.scheduler).call() data = render_template('index.html', data=job_list, status=sched_status) self.log.debug("Finished") return data except Exception, e: self.log.exception('Error during %s call' % self.__class__.__name__) return render_template('error.html', errors=[e.message])
class SchedulerController(object): def __init__(self, request, scheduler): self.log = Logger(self.__class__.__name__, fh) self._request = request self._scheduler = scheduler def call(self, *args, **kwargs): try: request_info = get_request_info(self._request) self.log.debug("Start process request: %s, %s, %s" % (request_info.url, request_info.data, request_info.method)) data = self._call(*args, **kwargs) self.log.debug('Finished') return data except Exception, e: self.log.exception('Error during %s call' % self.__class__.__name__) return render_template('error.html', error=e.message) # TODO шаблон
def extract_all(bag, basepath): extractDir = basepath + "/extracted" # extract bag info extract_bag_info(bag, basepath) # extract gps extract_gps_info(bag, extractDir) # extract lidar extract_lidar_points(bag, extractDir) # extract images extract_images(bag, extractDir, img_type="jpeg", hist=False) with open(basepath + "/info.json") as f: info = json.load(f) # collate timestamps csv Logger.debug('creating timestamps.csv') # we are interested only in images, lidar, gps and imu data types interestedTypes = ['sensor_msgs/Image', 'sensor_msgs/PointCloud2', 'sensor_msgs/NavSatFix', 'sensor_msgs/Imu'] dataFrames = [] topicNames = [] for topic in info['topics']: topicName = topic['topic'] msgType = topic['type'] timestampFile = extractDir + topicName + "/timestamp.csv" if msgType in interestedTypes and os.path.exists(timestampFile): Logger.debug('reading timestamps from ' + timestampFile) df = pd.read_csv(timestampFile, index_col=['frame'],usecols=['frame', 'sec']) df.columns = [topicName] dataFrames.append(df) topicNames.append(topicName) allFrames = pd.concat(dataFrames, ignore_index=True, axis=1) allFrames.columns = topicNames make_dirs(basepath + '/analytics') allFrames.to_csv(basepath + '/analytics/timestamps.csv', float_format='%.7f') Logger.debug('timestamps written to ' + basepath + '/analytics/timestamps.csv') # generate videos videoDir = basepath + '/videos' for topic in info['topics']: topicName = topic['topic'] msgType = topic['type'] # create videos from only the left lens/camera if msgType == 'sensor_msgs/Image' and 'left' in topicName: imagesDir = extractDir + topicName make_dirs(videoDir) filename = topicName.replace('image_raw', '').replace('left', 'Left').replace('/', '') + '.mp4' videoFile = videoDir + '/' + filename Logger.debug('creating ' + videoFile) makeVideo(imagesDir, output=videoFile, fps=10) # Annotations dir for storing video comments annotDir = basepath + '/annotations' make_dirs(annotDir) os.chmod(annotDir, stat.S_IRWXU | stat.S_IRWXG | stat.S_ISGID | stat.S_IRWXO)
def flushdb(): # connecting to wwe database Logger.debug("connecting to database...") setupdb() Logger.debug("proceeding to delete entires...") for bagfile in RawBagfile.objects(): Logger.debug("deleting " + bagfile.filename) bagfile.delete() Logger.debug("finished!")
def send(self, time, packet, origin, from_free=False): """ Sends a packet to a destination. Args: time (int): The time at which the packet was sent. packet (Packet): The packet. origin (Host|Router): The node origin of the packet. """ origin_id = self.get_direction_by_node(origin) dst_id = 3 - origin_id destination = self.get_node_by_direction(dst_id) if self.in_use or self.packets_on_link[origin_id] != []: if self.current_dir is not None: Logger.debug(time, "Link %s in use, currently sending to node " "%d (trying to send %s)" % (self.id, self.current_dir, packet)) else: Logger.debug(time, "Link %s in use, currently sending to node " "%d (trying to send %s)" % (self.id, origin_id, packet)) if self.buffer.size() >= self.buffer_size: # Drop packet if buffer is full Logger.debug(time, "Buffer full; packet %s dropped." % packet) self.dispatch(DroppedPacketEvent(time, self.id)) return self.buffer.add_to_buffer(packet, dst_id, time) else: if not from_free and self.buffer.buffers[dst_id] != []: # Since events are not necessarily executed in the order we # would expect, there may be a case where the link was free # (nothing on the other side and nothing currently being put # on) but the actual event had not yet fired. # # In such a case, the buffer will not have been popped from # yet, so put the packet we want to send on the buffer and # take the first packet instead. self.buffer.add_to_buffer(packet, dst_id, time) packet = self.buffer.pop_from_buffer(dst_id, time) Logger.debug(time, "Link %s free, sending packet %s to %s" % (self.id, packet, destination)) self.in_use = True self.current_dir = dst_id transmission_delay = self.transmission_delay(packet) self.dispatch(PacketSentOverLinkEvent(time, packet, destination, self)) # Link will be free to send to same spot once packet has passed # through fully, but not to send from the current destination until # the packet has completely passed. # Transmission delay is delay to put a packet onto the link self.dispatch(LinkFreeEvent(time + transmission_delay, self, dst_id, packet)) self.dispatch(LinkFreeEvent(time + transmission_delay + self.delay, self, self.get_other_id(dst_id), packet)) self.update_link_throughput(time, packet, time + transmission_delay + self.delay)
def update_link_throughput(self, time, packet, time_received): """ Update the link throughput :param time: Time when this update is occurring :type time: float :param packet: Packet we're updating the throughput with :type packet: Packet :param time_received: Time the packet was received at the other node :type time_received: float :return: Nothing :rtype: None """ self.bytesSent += packet.size() self.sendTime = time_received assert self.sendTime != 0, "Packet should not be received at time 0." throughput = (8 * self.bytesSent) / (self.sendTime / 1000) # bits/s Logger.debug(time, "%s throughput is %f" % (self, throughput)) self.dispatch(LinkThroughputEvent(time, self.id, throughput))
class Scraper(object): def __init__(self, verbose=False): self.logger = Logger(verbose=verbose) self._targets = targets self._parser = None def urlinfo(self, url): """ Initializes Parser Model. Returns URL target information if a match against known patterns is found. """ if not self._parser: self._parser = BaseParser(self) try: info = self._parser.urlinfo(url, init=True) except TargetPatternNotFound: self.logger.debug(' >> Target Pattern Not Found') return {} else: if info.has_key('parser'): self._parser = info['parser'] self.logger.debug('Using model "%s"' % self._parser.name) return info def parse(self, url, headers=None, proxy=None): """ Returns Tree object """ if not self._parser: self._parser = BaseParser(self) return self._parser.parse(url, headers, proxy) def get_deals(self, url): if not url: return [] self.urlinfo(url) # initialize parser model return self._parser.get_deals(url) def get_deal(self, url=''): if not url: return {} try: self.urlinfo(url) # initialize parser model except TargetPatternNotFound: self.logger.debug(' >> Target Pattern Not Found') return {} else: try: return self._parser.get_deal(url) except ElementMissing as e: self.logger.debug(' >> Element Missing - {:s}'.format(e)) return {}
def launch_request(self, waitTime=0): """ Lauch request. """ self.__response = None url = self.url if not url: url = "http://{}:{}/{}".format(self.host, self.port, self.path) Logger.debug("Launch request to: {}".format(url)) if self.doPost: self.__response = requests.post(url, headers=self.headers, data=self.data) else: self.__response = requests.get(url, headers=self.headers) if waitTime: time.sleep(waitTime)
def run(self): self.start_animation() while True: #print ('\nwaiting to receive message') try: data, address = self.sock.recvfrom(4096) #print ('received %s bytes from %s' % (len(data), address)) #print (data.decode('utf-8')) # parse the message msg = json.loads(data.decode('utf-8')) for name in msg: if name in self.leds: led = self.leds[name] if msg[name] == 'on': led.on() elif msg[name] == 'off': led.off() elif msg[name] == 'blink': if "delay" in msg: led.blink(float(msg['delay'])) else: led.blink() if 'time' in msg and float(msg['time']) > 0: led.message_validity_time = time.time() + float( msg['time']) except socket.timeout: pass except (KeyboardInterrupt, SystemExit): Logger.debug("Interrupted or Exit") break except: traceback.print_exc() self.update()
def update_single_info(infofile): Logger.debug('reading info from ' + infofile) with open(infofile, 'r') as f: baginfo = json.load(f) # connect to wwe database Logger.debug("connecting to database...") setupdb() # split infofile path. ex: raw_dataset/intel/bangalore/2018_01_11/2018-01-11_15-40-44/info.json pathinfo = infofile.split("/") vendor = pathinfo[1] cdate = pathinfo[-3] size = humanfriendly.format_size(baginfo['size']) duration = humanfriendly.format_timespan(baginfo['duration']) name = baginfo['path'].split("/")[-1] name = name[:10] + "_" + name[11:] dist = "0" if 'distance' in baginfo and 'dist' in baginfo['distance']: dist = "{0:.5f} ".format( baginfo['distance']['dist']) + baginfo['distance']['unit'] Logger.debug("adding distance: " + dist) # add relevant locations as necessary # TODO: pickup locations from a config file instead of manual checks if "bangalore" in infofile: loc = "bangalore" elif "telangana" in infofile: loc = "hyderabad" elif "hyderabad" in infofile: loc = "hyderabad" else: loc = "unknown" rawbagfile = RawBagfile(key=genMd5(infofile), vendor=vendor, hsize=size, hduration=duration, filename=name, location=loc, capturedate=cdate, path=infofile, distance=dist, info=baginfo) duplicateFound = False # for existing entries for bagfile in RawBagfile.objects(key=rawbagfile.key): Logger.warn("found entry with duplicate key - " + bagfile.key) duplicateFound = True bagfile.delete() for bagfile in RawBagfile.objects(filename=rawbagfile.filename): Logger.warn("found entry with duplicate filename - " + bagfile.filename) duplicateFound = True bagfile.delete() # save new info if no duplicates #if not duplicateFound: # debugPrint("udpating db with new info...") rawbagfile.save()
def execute(self): # If the packet that was sent to trigger this link free event is still # on the link, that's fine; this event is what means that it's off the # link, so we can remove it. if self.packet in self.link.packets_on_link[3 - self.direction]: self.link.packets_on_link[3 - self.direction].remove(self.packet) # Now, we check that there's nothing on the other side of the link. # If the link is currently sending data in the other direction, we # can't do anything here. if self.link.packets_on_link[3 - self.direction] != []: return destination = self.link.get_node_by_direction(self.direction) origin = self.link.get_node_by_direction(3 - self.direction) Logger.debug(self.time, "Link %s freed towards node %d (%s)" % (self.link.id, self.direction, destination)) self.link.in_use = False self.link.current_dir = None next_packet_in_dir = self.link.buffer.pop_from_buffer(self.direction, self.time) if next_packet_in_dir is not None: Logger.debug(self.time, "Buffer exists toward node %d" % (self.direction)) self.link.send(self.time, next_packet_in_dir, origin, from_free=True)
class SpoopyManager: def __init__(self, spoopy_client, sqlite_boss): self.logger = Logger(caller_filepath=__file__, clazz=self.__class__) self.spoopy_client = spoopy_client self.sqlite_boss = sqlite_boss self.setup() def setup(self): self.sqlite_boss.create_db() def sync_artists_metadata(self): self.logger.log("Starting to sync artist metadata") for results in self.spoopy_client.get_artists_by_search_terms(): for result in results['artists']['items']: artist = ArtistDao( result['id'], result['name'], result['popularity'], result['followers']['total'], result['genres'], ) self.logger.debug( f"Inserting record for artist {result['name']}") artist.insert()
def extract_sensor_info(bag, topic, extractPath): topic_sensor_info = getTopics(bag, topic) Logger.debug(('interested topics = %s \n' % topic_sensor_info)) if not topic_sensor_info: Logger.warn('no topics available to extract data!') return # create dir for each topic for topic in topic_sensor_info: make_dirs(extractPath + topic) msgs = [] json_dump_file = extractPath + topic + "/data.json" timestampFile = extractPath + topic + "/timestamp.csv" timestampsInSec, timestampsInString = [], [] for tpic, msg, time in tqdm(bag.read_messages(topics=[topic]), unit=" frame", desc="extracting " + topic): timestampSec = msg.header.stamp.to_time() dtime = datetime.fromtimestamp(timestampSec) timestampString = dtime.strftime(TIME_FILE_NAME_FORMAT) timestampsInSec.append(timestampSec) timestampsInString.append(timestampString) sensor_info_file = extractPath + topic + "/" + dtime.strftime(TIME_FILE_NAME_FORMAT) + ".txt" data = {} msgs.append(msg) if not os.path.exists(sensor_info_file): Logger.debug('extracted ' + sensor_info_file) #f = open(sensor_info_file, 'w+') #f.write('%s\n' % msg) #f.close() else: Logger.warn("file exists. skipping write: " + sensor_info_file) df = pd.DataFrame(columns=['sec', 'datetime']) df['sec'] = timestampsInSec df['datetime'] = timestampsInString df.index.name='frame' df.to_csv(timestampFile, float_format='%.7f') sensordata = {} sensordata["data"] = msgs with open(json_dump_file, "w+") as f: f.write('%s' % sensordata) Logger.debug("dumped binary data to " + json_dump_file)
except Exception as e: pass return sim if __name__ == '__main__': if mode == 'valid': df_feature = pd.read_pickle('../user_data/data/offline/recall.pkl') df_click = pd.read_pickle('../user_data/data/offline/click.pkl') else: df_feature = pd.read_pickle('../user_data/data/online/recall.pkl') df_click = pd.read_pickle('../user_data/data/online/click.pkl') # 文章特征 log.debug(f'df_feature.shape: {df_feature.shape}') df_article = pd.read_csv('../tcdata/articles.csv') df_article['created_at_ts'] = df_article['created_at_ts'] / 1000 df_article['created_at_ts'] = df_article['created_at_ts'].astype('int') df_feature = df_feature.merge(df_article, how='left') df_feature['created_at_datetime'] = pd.to_datetime( df_feature['created_at_ts'], unit='s') log.debug(f'df_article.head(): {df_article.head()}') log.debug(f'df_feature.shape: {df_feature.shape}') log.debug(f'df_feature.columns: {df_feature.columns.tolist()}') # 历史记录相关特征 df_click.sort_values(['user_id', 'click_timestamp'], inplace=True) df_click.rename(columns={'click_article_id': 'article_id'}, inplace=True)
def main(*kargs, **kwargs): get_kwargs(kwargs) train_fname = kwargs['train'] test_fname = kwargs['test'] result_fname = kwargs['output'] embeds_fname = kwargs['embeds'] logger_fname = kwargs['logger'] swear_words_fname = kwargs['swear_words'] wrong_words_fname = kwargs['wrong_words'] warm_start = kwargs['warm_start'] format_embeds = kwargs['format_embeds'] cnn_model_file = 'data/cnn.h5' lstm_model_file = 'data/lstm.h5' concat_model_file = 'data/concat.h5' cnn_model_file = 'data/cnn.h5' lr_model_file = 'data/{}_logreg.bin' meta_catboost_model_file = 'data/{}_meta_catboost.bin' # ====Create logger==== logger = Logger(logging.getLogger(), logger_fname) # ====Load data==== logger.info('Loading data...') train_df = load_data(train_fname) test_df = load_data(test_fname) target_labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'] num_classes = len(target_labels) # ====Load additional data==== logger.info('Loading additional data...') swear_words = load_data(swear_words_fname, func=lambda x: set(x.T[0]), header=None) wrong_words_dict = load_data(wrong_words_fname, func=lambda x: {val[0] : val[1] for val in x}) tokinizer = RegexpTokenizer(r'\w+') regexps = [re.compile("([a-zA-Z]+)([0-9]+)"), re.compile("([0-9]+)([a-zA-Z]+)")] # ====Load word vectors==== logger.info('Loading embeddings...') embed_dim = 300 embeds = Embeds(embeds_fname, 'fasttext', format=format_embeds) # ====Clean texts==== logger.info('Cleaning text...') if warm_start: logger.info('Use warm start...') else: train_df['comment_text_clear'] = clean_text(train_df['comment_text'], tokinizer, wrong_words_dict, swear_words, regexps) test_df['comment_text_clear'] = clean_text(test_df['comment_text'], tokinizer, wrong_words_dict, swear_words, regexps) train_df.to_csv(train_clear, index=False) test_df.to_csv(test_clear, index=False) # ====Calculate maximum seq length==== logger.info('Calc text length...') train_df.fillna('unknown', inplace=True) test_df.fillna('unknown', inplace=True) train_df['text_len'] = train_df['comment_text_clear'].apply(lambda words: len(words.split())) test_df['text_len'] = test_df['comment_text_clear'].apply(lambda words: len(words.split())) max_seq_len = np.round(train_df['text_len'].mean() + 3*train_df['text_len'].std()).astype(int) logger.debug('Max seq length = {}'.format(max_seq_len)) # ====Prepare data to NN==== logger.info('Converting texts to sequences...') max_words = 100000 train_df['comment_seq'], test_df['comment_seq'], word_index = convert_text2seq(train_df['comment_text_clear'].tolist(), test_df['comment_text_clear'].tolist(), max_words, max_seq_len, lower=True, char_level=False) logger.debug('Dictionary size = {}'.format(len(word_index))) logger.info('Preparing embedding matrix...') embedding_matrix, words_not_found = get_embedding_matrix(embed_dim, embeds, max_words, word_index) logger.debug('Embedding matrix shape = {}'.format(np.shape(embedding_matrix))) logger.debug('Number of null word embeddings = {}'.format(np.sum(np.sum(embedding_matrix, axis=1) == 0))) # ====Train/test split data==== x = np.array(train_df['comment_seq'].tolist()) y = np.array(train_df[target_labels].values) x_train_nn, x_test_nn, y_train_nn, y_test_nn, train_idxs, test_idxs = split_data(x, y, test_size=0.2, shuffle=True, random_state=42) test_df_seq = np.array(test_df['comment_seq'].tolist()) # ====Train models==== # CNN logger.info("training CNN ...") cnn = get_cnn(embedding_matrix, num_classes, embed_dim, max_seq_len, num_filters=64, l2_weight_decay=0.0001, dropout_val=0.5, dense_dim=32, add_sigmoid=True) cnn_hist = train(x_train_nn, y_train_nn, cnn, batch_size=256, num_epochs=100, learning_rate=0.005, early_stopping_delta=0.0001, early_stopping_epochs=3, use_lr_stratagy=True, lr_drop_koef=0.66, epochs_to_drop=2, logger=logger) y_cnn = cnn.predict(x_test_nn) save_predictions(test_df, cnn.predict(test_df_seq), target_labels, 'cnn') metrics_cnn = get_metrics(y_test_nn, y_cnn, target_labels, hist=cnn_hist, plot=False) logger.debug('CNN metrics:\n{}'.format(metrics_cnn)) cnn.save(cnn_model_file) # LSTM logger.info("training LSTM ...") lstm = get_lstm(embedding_matrix, num_classes, embed_dim, max_seq_len, l2_weight_decay=0.0001, lstm_dim=50, dropout_val=0.3, dense_dim=32, add_sigmoid=True) lstm_hist = train(x_train_nn, y_train_nn, lstm, batch_size=256, num_epochs=100, learning_rate=0.005, early_stopping_delta=0.0001, early_stopping_epochs=3, use_lr_stratagy=True, lr_drop_koef=0.66, epochs_to_drop=2, logger=logger) y_lstm = lstm.predict(x_test_nn) save_predictions(test_df, lstm.predict(test_df_seq), target_labels, 'lstm') metrics_lstm = get_metrics(y_test_nn, y_lstm, target_labels, hist=lstm_hist, plot=False) logger.debug('LSTM metrics:\n{}'.format(metrics_lstm)) lstm.save(lstm_model_file) # CONCAT logger.info("training Concat NN (LSTM + CNN) ...") concat = get_concat_model(embedding_matrix, num_classes, embed_dim, max_seq_len, num_filters=64, l2_weight_decay=0.0001, lstm_dim=50, dropout_val=0.5, dense_dim=32, add_sigmoid=True) concat_hist = train([x_train_nn, x_train_nn], y_train_nn, concat, batch_size=256, num_epochs=100, learning_rate=0.005, early_stopping_delta=0.0001, early_stopping_epochs=4, use_lr_stratagy=True, lr_drop_koef=0.66, epochs_to_drop=3, logger=logger) y_concat = concat.predict([x_test_nn, x_test_nn]) save_predictions(test_df, concat.predict([test_df_seq, test_df_seq]), target_labels, 'concat') metrics_concat = get_metrics(y_test_nn, y_concat, target_labels, hist=concat_hist, plot=False) logger.debug('Concat_NN metrics:\n{}'.format(metrics_concat)) concat.save(concat_model_file) # TFIDF + LogReg logger.info('training LogReg over tfidf...') train_tfidf, val_tfidf, test_tfidf, word_tfidf, char_tfidf = get_tfidf(train_df['comment_text_clear'].values[train_idxs], train_df['comment_text_clear'].values[test_idxs], test_df['comment_text_clear'].values) models_lr = [] metrics_lr = {} y_tfidf = [] for i, label in enumerate(target_labels): model = NbSvmClassifier(C=4.0, solver='sag', max_iter=1000) model.fit(train_tfidf, y_train_nn[:, i]) y_tfidf.append(model.predict_proba(val_tfidf)[:,1]) test_df['tfidf_{}'.format(label)] = model.predict_proba(test_tfidf)[:,1] metrics_lr[label] = calc_metrics(y_test_nn[:, i], y_tfidf[-1]) models_lr.append(model) joblib.dump(model, lr_model_file.format(label)) metrics_lr['Avg logloss'] = np.mean([metric[0] for label, metric in metrics_lr.items()]) logger.debug('LogReg(TFIDF) metrics:\n{}'.format(metrics_lr)) # Bow for catboost top_pos_words = [] top_neg_words = [] for i in range(num_classes): top_pos_words.append([]) top_neg_words.append([]) top_pos_words[-1], top_neg_words[-1] = get_most_informative_features([word_tfidf, char_tfidf], models_lr[i], n=100) top_pos_words = list(set(np.concatenate([[val for score, val in top] for top in top_pos_words]))) top_neg_words = list(set(np.concatenate([[val for score, val in top] for top in top_neg_words]))) top = list(set(np.concatenate([top_pos_words, top_neg_words]))) train_bow = get_bow(train_df['comment_text_clear'].values[train_idxs], top) val_bow = get_bow(train_df['comment_text_clear'].values[test_idxs], top) test_bow = get_bow(test_df['comment_text_clear'].values, top) logger.debug('Count bow words = {}'.format(len(top))) # Meta catboost logger.info('training catboost as metamodel...') train_df['text_unique_len'] = train_df['comment_text_clear'].apply(calc_text_uniq_words) test_df['text_unique_len'] = test_df['comment_text_clear'].apply(calc_text_uniq_words) train_df['text_unique_koef'] = train_df['text_unique_len'] / train_df['text_len'] test_df['text_unique_koef'] = test_df['text_unique_len'] / test_df['text_len'] text_len_features = train_df[['text_len', 'text_unique_len', 'text_unique_koef']].values[test_idxs] x_train_catboost = [] y_train_catboost = y_test_nn for len_f, y_hat_cnn, y_hat_lstm, y_hat_concat, y_hat_tfidf, bow in zip(text_len_features, y_cnn, y_lstm, y_concat, np.array(y_tfidf).T, val_bow): x_train_catboost.append(np.concatenate([len_f, y_hat_cnn, y_hat_lstm, y_hat_concat, y_hat_tfidf, bow])) models_cb = [] metrics_cb = {} x_train_cb, x_val_cb, y_train_cb, y_val_cb = train_test_split(x_train_catboost, y_train_catboost, test_size=0.20, random_state=42) for i, label in enumerate(target_labels): model = CatBoostClassifier(loss_function='Logloss', iterations=1000, depth=6, rsm=1, learning_rate=0.01) model.fit(x_train_cb, y_train_cb[:, i], plot=True, eval_set=(x_val_cb, y_val_cb[:, i]), use_best_model=True) y_hat_cb = model.predict_proba(x_val_cb) metrics_cb[label] = calc_metrics(y_val_cb[:, i], y_hat_cb[:, 1]) models_cb.append(model) joblib.dump(model, meta_catboost_model_file.format(label)) metrics_cb['Avg logloss'] = np.mean([metric[0] for label,metric in metrics_cb.items()]) logger.debug('CatBoost metrics:\n{}'.format(metrics_cb)) # ====Predict==== logger.info('Applying models...') text_len_features = test_df[['text_len', 'text_unique_len', 'text_unique_koef']].values y_cnn_test = test_df[['cnn_{}'.format(label) for label in target_labels]].values y_lstm_test = test_df[['lstm_{}'.format(label) for label in target_labels]].values y_concat_test = test_df[['concat_{}'.format(label) for label in target_labels]].values y_tfidf_test = test_df[['tfidf_{}'.format(label) for label in target_labels]].values x_test_cb = [] for len_f, y_hat_cnn, y_hat_lstm, y_hat_concat, y_hat_tfidf, bow in tqdm(zip(text_len_features, y_cnn_test, y_lstm_test, y_concat_test, y_tfidf_test, test_bow)): x_test_cb.append(np.concatenate([len_f, y_hat_cnn, y_hat_lstm, y_hat_concat, y_hat_tfidf, bow])) for label, model in zip(target_labels, models_cb): pred = model.predict_proba(x_test_cb) test_df[label] = np.array(list(pred))[:, 1] # ====Save results==== logger.info('Saving results...') test_df[['id', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].to_csv(result_fname, index=False, header=True)
return hit_cnt / cnt if __name__ == '__main__': if mode == 'valid': df_click = pd.read_pickle('user_data/data/offline/click.pkl') df_query = pd.read_pickle('user_data/data/offline/query.pkl') recall_path = 'user_data/data/offline' else: df_click = pd.read_pickle('user_data/data/online/click.pkl') df_query = pd.read_pickle('user_data/data/online/query.pkl') recall_path = 'user_data/data/online' log.debug(f'max_threads {max_threads}') #recall_methods = ['itemcf', 'w2v', 'binetwork'] recall_methods = ['itemcf', 'w2v'] #weights = {'itemcf': 1, 'binetwork': 1, 'w2v': 0.1} weights = {'itemcf': 1, 'w2v': 0.1} recall_list = [] recall_dict = {} for recall_method in recall_methods: recall_result = pd.read_pickle( f'{recall_path}/recall_{recall_method}.pkl') weight = weights[recall_method] # 重复被召回的新闻在多个召回方式中的得分是不一样的,得分是排序时的强特,需要进行得分合并。 # 对各个召回结果,以用户为单位进行相似度的最大最小归一化。 recall_result['sim_score'] = mms(recall_result)
class TaskManager(object): def __init__(self): self.name = 'TaskManager' self.logger = Logger(self.name) self.tasks = [] self.priorities = [] def add_task(self, task): curr_highest_priority = max(self.priorities) if self.priorities else 0 if task.priority < curr_highest_priority: self.logger.warning( (task.name, 'Request refused. Higher priority task is running.')) return False elif task.priority == curr_highest_priority: self.logger.info(( task.name, 'Task is created successfully. Same priority task is running. Adding to the task list' )) self.tasks.append(task) self.priorities.append(task.priority) elif task.priority > curr_highest_priority: if self.tasks or self.priorities: self.logger.warning( (task.name, 'is created successfully. Other tasks are discarded.')) self.tasks.clear() self.priorities.clear() self.tasks.append(task) self.priorities.append(task.priority) def clean_tasks(self): """ :brief: remove timeout and completed task from task list :return: """ for task in list(self.tasks): to_remove = False if task.state == task.states['end']: self.logger.info((task.name, 'is done.')) to_remove = True if task.is_current_task_timeout(): self.logger.error('Task ' + task.name + ' timeout...') to_remove = True if to_remove: self.tasks.remove(task) self.priorities.remove(task.priority) def run_tasks(self): self.clean_tasks() for task in self.tasks: self.logger.debug(('current task: ', task.name, 'current state: ', task.state.name)) task.run()
default=settings.DEFAULT_TRS_PORT, help='Translation Server Port Address.') parser.add_argument('-c', dest='tcs_name', type=str, default=settings.DEFAULT_TCS_NAME, help='Translation Contact Server IP Address.') parser.add_argument('-e', dest='tcs_port', type=int, default=settings.DEFAULT_TCS_PORT, help='Translation Contact Server Port Address.') args = parser.parse_args() # validate them # print information just to make sure log.debug( "Using Language = {}, TRS Port = {}, TCS Name = {}, TCS Port = {}.". format(args.language, args.trs_port, args.tcs_name, args.tcs_port)) # for steps 1º and 3º we need an UDP connection udp = UDP(args.tcs_name, args.tcs_port) # for 2º step, we need a TCP connection tcp = TCP(args.trs_name, args.trs_port) # 1º - register this server into TCS database response = udp.request("SRG {} {} {}\n".format(args.language, args.trs_name, args.trs_port)) if response == "SRR OK": log.error("TCS Server register TRS Server \"{}\" successfully.".format( args.language)) elif response == "SRR NOK": log.error(
def execute(self): msg = "ACK received from host %s, flow %s" % (self.host, self.flow) Logger.debug(self.time, msg) self.flow.ack_received(self.time)
if __name__ == "__main__": log.info("Starting TCS server...") # format of command is ./tcs [-p TCSport], parser = argparse.ArgumentParser() parser.add_argument('-n', dest='tcs_name', type=str, default=settings.DEFAULT_TCS_NAME, help='Translation Contact Server IP Address.') parser.add_argument('-p', dest='tcs_port', type=int, default=settings.DEFAULT_TCS_PORT, help='Translation Contact Server Port Address.') args = parser.parse_args() # validate them # print information just to make sure log.debug("Using TCS Port = {}.".format(args.tcs_port)) handler = TCSHandler() try: # running server udp = UDP(args.tcs_name, args.tcs_port) udp.run(handler=handler) except KeyboardInterrupt, e: # if CTRL+C is pressed, then go for last step log.info("Exiting TCS Server...") pass finally: # remove db file handler.DB.close()
def main(argv): parser = argparse.ArgumentParser(description="Extracts raw data from rosbag files") parser.add_argument('rosbag', help='Rosbag file to extract data') parser.add_argument('datatype', nargs='+', help='Type of data to be extracted. supported option include [all|info|images|caminfo|gps]') parser.add_argument('--encode', help='[raw|jpeg] when provided with datatype=images, this option extracts images in the corresponding format') parser.add_argument('--hist', action='store_true', help='when provided with datatype=images, this option generates image histograms') parser.add_argument('-o', '--output', help='Dir to dump extracted data') parser.add_argument('-v', '--verbose', action='store_true', help='enable verbose outputs') args = parser.parse_args() # Initialized logger Logger.init(level=Logger.LEVEL_INFO, name="extract_data") if args.verbose: Logger.setLevel(Logger.LEVEL_DEBUG) inputfile = args.rosbag # set ouput dir if provided else extract data in current dir if args.output is not None: basepath = args.output else: basepath = "./" # all sensor data is extracted into <base_path>/extracted dir" outputdir = basepath + "/extracted" Logger.debug('processing ' + inputfile) Logger.debug('extracting to ' + outputdir) # create output dir it not existing for dumping data if not os.path.exists(outputdir): os.makedirs(outputdir) # check if bagfile exists if not os.path.exists(inputfile): Logger.error("File not found: " + inputfile) return bag = None # extract specified datatypes for datatype in args.datatype: if datatype == 'videos': extract_videos(basepath) else: # open bagfile if bag is None: bag = rosbag.Bag(inputfile) if datatype == 'images': hist = False if args.hist: hist = True if args.encode is not None and args.encode == "raw": extract_images(bag, outputdir, img_type="raw", hist=hist) elif args.encode is not None and args.encode == "jpeg": extract_images(bag, outputdir, img_type="jpeg", hist=hist) else: extract_images(bag, outputdir) elif datatype == 'caminfo': extract_cam_info(bag, outputdir) elif datatype == 'imu': extract_imu_info(bag, outputdir) elif datatype == 'gps': extract_gps_info(bag, outputdir) elif datatype == 'info': extract_bag_info(bag, basepath) elif datatype == 'lidar': extract_lidar_points(bag, outputdir) elif datatype == 'all': extract_all(bag, basepath) # close bag and exit if bag is not None: bag.close()
if __name__ == '__main__': if mode == 'valid': df_click = pd.read_pickle('../user_data/data/offline/click.pkl') df_query = pd.read_pickle('../user_data/data/offline/query.pkl') os.makedirs('../user_data/sim/offline', exist_ok=True) sim_pkl_file = '../user_data/sim/offline/binetwork_sim.pkl' else: df_click = pd.read_pickle('../user_data/data/online/click.pkl') df_query = pd.read_pickle('../user_data/data/online/query.pkl') os.makedirs('../user_data/sim/online', exist_ok=True) sim_pkl_file = '../user_data/sim/online/binetwork_sim.pkl' log.debug(f'df_click shape: {df_click.shape}') log.debug(f'{df_click.head()}') item_sim, user_item_dict = cal_sim(df_click) f = open(sim_pkl_file, 'wb') pickle.dump(item_sim, f) f.close() # 召回 n_split = max_threads all_users = df_query['user_id'].unique() shuffle(all_users) total = len(all_users) n_len = total // n_split # 清空临时文件夹
def execute(self): Logger.debug(self.time, "%s: Updating dynamic routing table." % self) # Fix dynamic cost of the link before starting routing table creation map(lambda l: l.fix_dynamic_cost(self.time), self.router.links) self.router.create_routing_table(dynamic=True)
fh = logging.FileHandler(os.path.join(config.LOG_TO, config.LOGGER.get('file'))) fh.setLevel(config.LOGGER.get('level')) fh.setFormatter(config.LOGGER.get('formatter')) log = Logger("IBotManager", fh) modules = get_all_modules() threads = [Thread(target=InstaBot(**get_config_from_module(module_name[:-3])).new_auto_mod, name=module_name[:-3]) for module_name in modules] for t in threads: try: log.debug("Try start '%s' bot" % t.name) t.start() log.debug("Successfully start '%s' bot." % t.name) except Exception as ex: log.exception("Error during work bot") [t.join() for t in threads if t.is_alive()] def stop_all_process(): signal.alarm(1) atexit.register(stop_all_process) time.sleep(5)