def cli_main(): log_file = 'stdout' # auto-name the log file log_level = '' utils.start_logging(log_file=log_file, log_level='INFO', verbose=False) # start logging opts = parse_arguments() count_silent_mutations(opts)
def run(args): logname = os.path.basename(__file__) + '.log' utils.start_logging(filename=logname, level=args.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logging.info(msg) connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport) (returnOk, vertices, minZ, maxZ, avgZ, numpoints) = create_cut_out(cursor, args.las, args.output, args.itemid, args.buffer, args.concave) if returnOk: # Create CSV with vertices of footprint footoutput = args.output + '_footprint.csv' logging.info('Creating CSV %s with vertices of concave hull of footprint' % footoutput) fpOutput = open(footoutput, 'w') for point in vertices: point.append(str(avgZ)) fpOutput.write(','.join(point) + '\n') fpOutput.close() logging.info('#Points: %d' % numpoints) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % (elapsed_time, logname) print(msg) logging.info(msg)
def run(opts): # Start logging logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logging.info(msg) # database connection connection, cursor = utils.connectToDB(opts.dbname, opts.dbuser, opts.dbpass, opts.dbhost, opts.dbport) if opts.itemid == '?': utils.listRawDataItems(cursor) return elif opts.itemid == '' or opts.itemid == '!': query = """ SELECT raw_data_item_id,abs_path,background FROM RAW_DATA_ITEM JOIN ITEM USING (item_id) JOIN RAW_DATA_ITEM_PC USING (raw_data_item_id) WHERE raw_data_item_id NOT IN ( SELECT raw_data_item_id FROM POTREE_DATA_ITEM_PC)""" # Get the list of items that are not converted yet (we sort by background to have the background converted first) raw_data_items, num_raw_data_items = utils.fetchDataFromDB( cursor, query) for (rawDataItemId, absPath, isBackground) in raw_data_items: if opts.itemid == '': levels = getNumLevels(opts, isBackground) createPOTree(cursor, rawDataItemId, opts.potreeDir, levels) else: m = '\t'.join((str(rawDataItemId), absPath)) print m logging.info(m) else: for rawDataItemId in opts.itemid.split(','): rows, num_rows = utils.fetchDataFromDB( cursor, 'SELECT background FROM RAW_DATA_ITEM JOIN ITEM USING (item_id) WHERE raw_data_item_id = %s', [int(rawDataItemId)]) if num_rows == 0: logging.error('There is not a raw data item with id %d' % int(rawDataItemId)) return isBackground = rows[0][0] levels = getNumLevels(opts, isBackground) createPOTree(cursor, int(rawDataItemId), opts.potreeDir, levels) # close DB connection utils.closeConnectionDB(connection, cursor) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % ( elapsed_time, logname) print(msg) logging.info(msg)
def run(args): logname = os.path.basename(__file__) + '.log' utils.start_logging(filename=logname, level=args.log) # connect to the DB connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport) itemIds = None if args.itemid != '': itemIds = args.itemid.split(',') utils.listRawDataItems(cursor, itemIds)
def run(args): # start logging logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' utils.start_logging(filename=logname, level=utils.DEFAULT_LOG_LEVEL) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' %localtime print msg logging.info(msg) # connect to the DB connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport) itemIds = [] if args.itemid == '': data,num = utils.fetchDataFromDB(cursor, 'SELECT item_id FROM ITEM WHERE NOT background') for (itemId,) in data: itemIds.append(itemId) else: itemIds = args.itemid.split(',') # close the conection to the DB utils.closeConnectionDB(connection, cursor) # Create queues itemsQueue = multiprocessing.Queue() # The queue of tasks (queries) resultsQueue = multiprocessing.Queue() # The queue of results for itemId in itemIds: itemsQueue.put(int(itemId)) for i in range(args.cores): #we add as many None jobs as numUsers to tell them to terminate (queue is FIFO) itemsQueue.put(None) procs = [] # We start numUsers users processes for i in range(args.cores): procs.append(multiprocessing.Process(target=runChild, args=(i, itemsQueue, resultsQueue, args.las, args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport))) procs[-1].start() for i in range(len(itemIds)): [procIndex, itemId] = resultsQueue.get() # wait for all users to finish their execution for i in range(args.cores): procs[i].join() # measure elapsed time elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % (elapsed_time, logname) print(msg) logging.info(msg)
def run(opts): # Start logging #logname = os.path.basename(__file__) + '.log' logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logging.info(msg) # database connection connection, cursor = utils.connectToDB(opts.dbname, opts.dbuser, opts.dbpass, opts.dbhost, opts.dbport) if opts.itemid == '?': utils.listRawDataItems(cursor) return elif opts.itemid == '' or opts.itemid == '!': query = """ SELECT raw_data_item_id, abs_path FROM RAW_DATA_ITEM JOIN ITEM USING (item_id) WHERE NOT background AND raw_data_item_id NOT IN ( SELECT raw_data_item_id FROM OSG_DATA_ITEM_PC_SITE UNION SELECT raw_data_item_id FROM OSG_DATA_ITEM_MESH UNION SELECT raw_data_item_id FROM OSG_DATA_ITEM_PICTURE)""" # Get the list of items that are not converted yet (we sort by background to have the background converted first) raw_data_items, num_raw_data_items = utils.fetchDataFromDB( cursor, query) for (rawDataItemId, absPath) in raw_data_items: if opts.itemid == '': createOSG(cursor, rawDataItemId, opts.osgDir) else: m = '\t'.join((str(rawDataItemId), absPath)) print m logging.info(m) else: for rawDataItemId in opts.itemid.split(','): createOSG(cursor, int(rawDataItemId), opts.osgDir) # close DB connection utils.closeConnectionDB(connection, cursor) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % ( elapsed_time, logname) print(msg) logging.info(msg)
def run(opts): # Start logging logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logging.info(msg) # database connection connection, cursor = utils.connectToDB(opts.dbname, opts.dbuser, opts.dbpass, opts.dbhost, opts.dbport) if opts.itemid == '?': utils.listRawDataItems(cursor) return elif opts.itemid == '' or opts.itemid == '!': query = """ SELECT raw_data_item_id,abs_path,background FROM RAW_DATA_ITEM JOIN ITEM USING (item_id) JOIN RAW_DATA_ITEM_PC USING (raw_data_item_id) WHERE raw_data_item_id NOT IN ( SELECT raw_data_item_id FROM POTREE_DATA_ITEM_PC)""" # Get the list of items that are not converted yet (we sort by background to have the background converted first) raw_data_items, num_raw_data_items = utils.fetchDataFromDB(cursor, query) for (rawDataItemId,absPath,isBackground) in raw_data_items: if opts.itemid == '' : levels = getNumLevels(opts, isBackground) createPOTree(cursor, rawDataItemId, opts.potreeDir, levels) else: m = '\t'.join((str(rawDataItemId),absPath)) print m logging.info(m) else: for rawDataItemId in opts.itemid.split(','): rows,num_rows = utils.fetchDataFromDB(cursor, 'SELECT background FROM RAW_DATA_ITEM JOIN ITEM USING (item_id) WHERE raw_data_item_id = %s', [int(rawDataItemId)]) if num_rows == 0: logging.error('There is not a raw data item with id %d' % int(rawDataItemId)) return isBackground = rows[0][0] levels = getNumLevels(opts, isBackground) createPOTree(cursor, int(rawDataItemId), opts.potreeDir, levels) # close DB connection utils.closeConnectionDB(connection, cursor) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % (elapsed_time, logname) print(msg) logging.info(msg)
def main(): parser = argparse.ArgumentParser(description='Migrate from Melange to Quark.') # noqa parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Log to stdout and to file.', dest='verbose') parser.add_argument('-c', '--clear', action='store_true', default=False, help='Clear logs before running.', dest='clearlogs') arguments = parser.parse_args() start_logging(verbose=arguments.verbose) if arguments.clearlogs: clear_logs() melange_session = loadSession(melange.engine) neutron_session = loadSession(neutron.engine) migration = Obligator(melange_session, neutron_session) migration.migrate()
def run(opts): # Start logging #logname = os.path.basename(__file__) + '.log' logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logging.info(msg) # database connection connection, cursor = utils.connectToDB(opts.dbname, opts.dbuser, opts.dbpass, opts.dbhost, opts.dbport) if opts.itemid == '?': utils.listRawDataItems(cursor) return elif opts.itemid == '' or opts.itemid == '!': query = """ SELECT raw_data_item_id, abs_path FROM RAW_DATA_ITEM JOIN ITEM USING (item_id) WHERE NOT background AND raw_data_item_id NOT IN ( SELECT raw_data_item_id FROM OSG_DATA_ITEM_PC_SITE UNION SELECT raw_data_item_id FROM OSG_DATA_ITEM_MESH UNION SELECT raw_data_item_id FROM OSG_DATA_ITEM_PICTURE)""" # Get the list of items that are not converted yet (we sort by background to have the background converted first) raw_data_items, num_raw_data_items = utils.fetchDataFromDB(cursor, query) for (rawDataItemId, absPath) in raw_data_items: if opts.itemid == '': createOSG(cursor, rawDataItemId, opts.osgDir) else: m = '\t'.join((str(rawDataItemId),absPath)) print m logging.info(m) else: for rawDataItemId in opts.itemid.split(','): createOSG(cursor, int(rawDataItemId), opts.osgDir) # close DB connection utils.closeConnectionDB(connection, cursor) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % (elapsed_time, logname) print(msg) logging.info(msg)
def run(opts): # set logging level global logger #logname = os.path.basename(__file__) + '.log' logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' logger = utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logger.info(msg) opts.file = opts.file.rstrip('/') # check if all required options are specified check_required_options(opts) # check if the required directory structure exists check_directory_structure(opts.data) # check input data check_input_data(opts) # define target directory TARGETDIR = define_create_target_dir(opts) # copy the data to the target directory copy_data(opts, TARGETDIR) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % ( elapsed_time, logname) print(msg) logger.info(msg)
def run(opts): # set logging level global logger #logname = os.path.basename(__file__) + '.log' logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' logger = utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logger.info(msg) opts.file = opts.file.rstrip('/') # check if all required options are specified check_required_options(opts) # check if the required directory structure exists check_directory_structure(opts.data) # check input data check_input_data(opts) # define target directory TARGETDIR = define_create_target_dir(opts) # copy the data to the target directory copy_data(opts, TARGETDIR) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % (elapsed_time, logname) print(msg) logger.info(msg)
def run(opts): # Set logging #logname = os.path.splitext(os.path.basename(opts.sql))[0] + '.log' logname = os.path.basename(opts.sql) + '.log' utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' %localtime print msg logging.info(msg) os.system('createdb ' + utils.postgresConnectString(opts.dbname, opts.dbuser, opts.dbpass, opts.dbhost, opts.dbport, True)) connection, cursor = utils.connectToDB(opts.dbname, opts.dbuser, opts.dbpass, opts.dbhost, opts.dbport) msg = 'Adding PostGIS extension' logging.info(msg) #print msg cursor.execute("CREATE EXTENSION POSTGIS") connection.commit() success_loading = utils.load_sql_file(cursor, opts.sql) msg = 'Granting relevant permissions' logging.info(msg) #print msg if success_loading: cursor.execute("select tablename from pg_tables where schemaname = 'public'") tablesNames = cursor.fetchall() for (tableName,) in tablesNames: cursor.execute('GRANT SELECT ON ' + tableName + ' TO public') for tableName in ('ITEM', 'ITEM_OBJECT', 'OSG_LOCATION', 'OSG_LABEL', 'OSG_CAMERA', 'OSG_ITEM_CAMERA', 'OSG_ITEM_OBJECT'): cursor.execute( 'GRANT SELECT,INSERT,UPDATE,DELETE ON ' + tableName + ' TO public') connection.commit() connection.close() msg = 'Finished. Total elapsed time %.02f seconds. See %s' % ((time.time() - t0), logname) logging.info(msg) print msg
def __init__(self): results = self._cli_parser() global logger logger = utils.start_logging(os.path.join(os.path.expanduser("~"), 'wrfpy.log')) if results['init']: self._create_directory_structure(results['suitename'], results['basedir']) elif results['create']: self._create_cylc_config(results['suitename'], results['basedir'])
def main(): parser = argparse.ArgumentParser( description='Migrate from Melange to Quark.') # noqa parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Log to stdout and to file.', dest='verbose') parser.add_argument('-c', '--clear', action='store_true', default=False, help='Clear logs before running.', dest='clearlogs') arguments = parser.parse_args() start_logging(verbose=arguments.verbose) if arguments.clearlogs: clear_logs() melange_session = loadSession(melange.engine) neutron_session = loadSession(neutron.engine) migration = Obligator(melange_session, neutron_session) migration.migrate()
def main(argv=None): if not argv: argv = sys.argv[1:] parser = argparse.ArgumentParser(description='Run bot to maintain issues.') parser.add_argument('--config', action="store", help='Configuration file') parser.add_argument('--logging-config', action="store", help='Logging configuration file') parser.add_argument('--user', action="store", help='Github user') parser.add_argument('--token', action="store", help='Github token') parser.add_argument('--repo', action="store", help='Repository') parser.add_argument('--label', action="store", help='Add this label to issues when closing') parser.add_argument( '--test', action="store_true", help='Print actions that would be taken but do not modify repository') args = parser.parse_args(argv) config = get_config(args.config) if args.user: config["user"] = args.user if args.token: config["token"] = args.token if args.repo: config["repo"] = args.repo if args.logging_config: config["logging-config"] = args.logging_config if args.test: config["test"] = True if args.label: config["label"] = args.label if "label" not in config: config["label"] = None start_logging(config) process_issues(config)
def __init__(self, input_dir, dest_dir): global logger logger = utils.start_logging('translate.log') # create log file self.input_dir = input_dir self.dest_dir = dest_dir self.database = 'translate.db' # hardcode database name for now try: with open(self.database) as file: pass # file exists and is readable, nothing else to do except IOError as e: # file does not exist OR no read permissions # create database if not existing self._create_database() # read yandex api key from file self._read_yandex_api_key('yandex_key') # TODO: hardcode for now self._yandex_connect(self.api_key)
def main(): pymysql.install_as_MySQLdb() logger, start_time = start_logging('update_db_from_jira') try: mode = "prod" Session, engine = create_db_session(mode) session = Session() jira_data_by_pr = JiraDataByProject(logger, session) project_data_mp = jira_data_by_pr.get_web_mp_data("DBIOSCA") project_data_web = jira_data_by_pr.get_web_mp_data("DBSBOLW") project_data_ufs = jira_data_by_pr.get_ufs_data("LINEUP") release_db = UpdateReleaseDB(logger, session) release_db.update_mp_web_db(project_data_mp) release_db.update_mp_web_db(project_data_web) release_db.update_ufs_db(project_data_ufs) except Exception as ex: print(ex) finally: update_time = datetime.datetime.now().replace( microsecond=0) - start_time logger.info(f"End of DB updating. Update time: {str(update_time)}") session.close()
def __init__(self, wrfpy_config=False): global logger wrfpy_dir = os.environ['HOME'] logger = utils.start_logging(os.path.join(wrfpy_dir, 'wrfpy.log')) if not wrfpy_config: try: # get CYLC_SUITE_DEF_PATH environment variable wrfpy_dir = os.environ['CYLC_SUITE_DEF_PATH'] except KeyError: # default back to user home dir in case CYLC is not used wrfpy_dir = os.environ['HOME'] # config.json needs to be in base of wrfpy_dir self.configfile = os.path.join(wrfpy_dir, 'config.json') else: self.configfile = wrfpy_config try: logger.debug('Checking if configuration file exists: %s' %self.configfile) utils.check_file_exists(self.configfile) except IOError: # create config file self._create_empty_config() # TODO: exit and notify user to manually edit config file # read json config file self._read_json()
def run(args): # set logging level global logger global connection global cursor logname = os.path.basename(__file__) + '.log' logger = utils.start_logging(filename=logname, level=args.log) localtime = utils.getCurrentTimeAsAscii() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logger.info(msg) # start timer t0 = time.time() # connect to the DB connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport) if args.itemid == '?': utils.listRawDataItems(cursor) return else: for rawDataItemId in args.itemid.split(','): # fetch the abs_path abs_paths = fetch_abs_path(rawDataItemId) msg = 'Abs path fetched: %s' % abs_paths print msg logger.info(msg) # fetch the potree abs_paths abs_potree_paths, num_potree = fetch_potree_abs_paths( rawDataItemId) msg = '%s abs potree paths fetched %s' % (num_potree, abs_potree_paths) print msg logger.info(msg) # fetch the nexus abs_paths abs_nexus_paths, num_nexus = fetch_nexus_abs_paths(rawDataItemId) msg = '%s abs nexus paths fetched %s' % (num_nexus, abs_nexus_paths) print msg logger.info(msg) # fetch the OSG abs_paths PC abs_osg_pc_paths, num_osg_pc = fetch_osg_abs_paths_pc( rawDataItemId) msg = '%s abs OSG paths for PC fetched: %s' % (num_osg_pc, abs_osg_pc_paths) print msg logger.info(msg) # fetch the OSG abs_paths mesh abs_osg_mesh_paths, num_osg_mesh = fetch_osg_abs_paths_mesh( rawDataItemId) msg = '%s abs OSG paths for meshes fetched: %s' % ( num_osg_mesh, abs_osg_mesh_paths) print msg logger.info(msg) # fetch the OSG abs_paths picture abs_osg_picture_paths, num_osg_picture = fetch_osg_abs_paths_picture( rawDataItemId) msg = '%s abs OSG paths for pictures fetched: %s' % ( num_osg_picture, abs_osg_picture_paths) print msg logger.info(msg) # fetch the OSG abs_paths PC BG abs_osg_pc_bg_paths, num_osg_pc_bg = fetch_osg_abs_paths_pc_bg( rawDataItemId) msg = '%s abs OSG paths for PC BG fetched: %s' % ( num_osg_pc_bg, abs_osg_pc_bg_paths) print msg logger.info(msg) # remove the files related to the above absolute paths for abs_paths_to_remove in (abs_paths, abs_potree_paths, abs_nexus_paths, abs_osg_pc_paths, abs_osg_mesh_paths, abs_osg_picture_paths, abs_osg_pc_bg_paths): remove_data(abs_paths_to_remove) msg = 'Removed data locations related to raw data item %s (%s)!' % ( rawDataItemId, abs_paths[0]) print msg logger.info(msg) # measure elapsed time elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % ( elapsed_time, logname) print(msg) logger.info(msg)
def run(opts): global logger # Define logger and start logging #logname = os.path.basename(opts.output) + '.log' logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' logger = utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logger.info(msg) if not opts.output.endswith(".conf.xml"): logger.error('The output file must end with .conf.xml') raise IOError('The output file must end with .conf.xml') # Create python postgres connection global cursor connection, cursor = utils.connectToDB(opts.dbname, opts.dbuser, opts.dbpass, opts.dbhost, opts.dbport) # Check that provided background is in DB query = """ SELECT OSG_DATA_ITEM_PC_BACKGROUND.abs_path,srid FROM OSG_DATA_ITEM_PC_BACKGROUND JOIN RAW_DATA_ITEM USING (raw_data_item_id)""" rows, num_rows = utils.fetchDataFromDB(cursor, query) backGroundAbsPath = None backgroundSRID = None for (bgAbsPath, bgSRID) in rows: if opts.background == os.path.basename(bgAbsPath): backGroundAbsPath = bgAbsPath backgroundSRID = bgSRID if backGroundAbsPath == None: errorMsg = 'Background ' + opts.background + ' is not found' logger.error(errorMsg) raise Exception(errorMsg) # Get the root object: the OSG configuration rootObject = viewer_conf_api.osgRCconfiguration() # set version rootObject.set_version("0.2") # Add all the different XML of the active objects # (we add distinct since the boundings will share XMLs) query = """ SELECT DISTINCT xml_abs_path FROM OSG_DATA_ITEM ORDER BY xml_abs_path""" rows, num_rows = utils.fetchDataFromDB(cursor, query) for (xmlPath, ) in rows: if xmlPath.count(opts.osg) == 0: logger.error('Mismatch between given OSG data directory ' + 'and DB content') rootObject.add_objectLibrary( viewer_conf_api.objectLibrary( url=os.path.relpath(xmlPath, opts.osg))) # Add the object library with the boundings rootObject.add_objectLibrary( viewer_conf_api.objectLibrary(url=utils.BOUNDINGS_XML_RELATIVE)) # Add the cameras that are in the DB cameras = viewer_conf_api.cameras() query = """ SELECT osg_camera_name, srid, x, y, z, h, p, r FROM OSG_CAMERA JOIN OSG_LOCATION USING (osg_location_id)""" rows, num_rows = utils.fetchDataFromDB(cursor, query) for (name, srid, x, y, z, h, p, r) in rows: if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) cameras.add_camera( viewer_conf_api.camera(name=name, x=x, y=y, z=z, h=h, p=p, r=r)) # Add Default cameras for the items that have no camera in the DB query = """ SELECT item_id, ST_SRID(geom), st_x(st_centroid(geom)), st_y(st_centroid(geom)), min_z + ((max_z - min_z) / 2) FROM ITEM WHERE NOT background AND geom IS NOT null AND item_id NOT IN ( SELECT DISTINCT item_id FROM OSG_ITEM_CAMERA ) ORDER BY item_id""" rows, numitems = utils.fetchDataFromDB(cursor, query) for (itemId, srid, x, y, z) in rows: # only call getOSGPosition if [x,y,z] are not None # should item_id = -1 be added? if all(position is not None for position in [x, y, z]) and itemId > 0: if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) cameras.add_camera( viewer_conf_api.camera(name=utils.DEFAULT_CAMERA_PREFIX + str(itemId), x=x, y=y, z=z)) rootObject.set_cameras(cameras) # Add the XML content of the preferences rootObject.set_preferences(viewer_conf_api.parseString(DEFAULT_PREFENCES)) attributes = viewer_conf_api.attributes() # Use generic method to fill all properties. # We need the name in the XML, the column name in the DB and # the table name in the DB for property in utils.ATTRIBUTES_ORDER: (cName, tName) = utils.ATTRIBUTES[property] elements = getattr(viewer_conf_api, property + 's')() # We need to call the columns and tables with extra "" because # they were created from the Access DB # utils.dbExecute(cursor, 'SELECT "' + cName + '" FROM "' + tName + '"') utils.dbExecute(cursor, 'SELECT ' + cName + ' FROM ' + tName) for (element, ) in cursor: getattr(elements, 'add_' + property)(getattr(viewer_conf_api, property)(name=element)) getattr(attributes, 'set_' + property + 's')(elements) rootObject.set_attributes(attributes) # Add all the static objects, i.e. the OSG from the background # Add the static object for the background staticObjects = viewer_conf_api.staticObjects() staticObjects.add_staticObject( viewer_conf_api.staticObject(url=os.path.relpath( glob.glob(backGroundAbsPath + '/' + utils.OSG_DATA_PREFIX + '.osgb')[0], opts.osg))) # Add hardcode DOME staticObjects.add_staticObject( viewer_conf_api.staticObject(url=utils.DOMES_OSG_RELATIVE)) rootObject.set_staticObjects(staticObjects) # Add the 5 different layers of active objects activeObjects = viewer_conf_api.activeObjects() # First we add points, meshes and pcitures which are related to # the active_objects_sites layersData = [('points', 'OSG_DATA_ITEM_PC_SITE', utils.AO_TYPE_PC), ('photos', 'OSG_DATA_ITEM_PICTURE', utils.AO_TYPE_PIC), ('meshes', 'OSG_DATA_ITEM_MESH', utils.AO_TYPE_MESH)] for (layerName, tableName, inType) in layersData: layer = viewer_conf_api.layer(name=layerName) query = """ SELECT item_id, raw_data_item_id, OSG_LOCATION.srid, x, y, z, xs, ys, zs, h, p, r, cast_shadow FROM """ + tableName + """ JOIN OSG_DATA_ITEM USING (osg_data_item_id) JOIN OSG_LOCATION USING (osg_location_id) JOIN RAW_DATA_ITEM USING (raw_data_item_id) ORDER BY item_id""" rows, numitems = utils.fetchDataFromDB(cursor, query) for (itemId, rawDataItemId, srid, x, y, z, xs, ys, zs, h, p, r, castShadow) in rows: # only call getOSGPosition if [x,y,z] are not None if all(position is not None for position in [x, y, z]): if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) uniqueName = utils.codeOSGActiveObjectUniqueName( cursor, inType, rawDataItemId) activeObject = viewer_conf_api.activeObject(prototype=uniqueName, uniqueName=uniqueName) setting = viewer_conf_api.setting( x=x, y=y, z=z, xs=xs, ys=ys, zs=zs, h=h, p=p, r=r, castShadow=(1 if castShadow else 0)) activeObject.set_setting(setting) layer.add_activeObject(activeObject) activeObjects.add_layer(layer) # Add the boundings layer = viewer_conf_api.layer(name='boundings') # We first add the boundings that are currently in the DB query = """ SELECT item_id, object_number, x, y, z, xs, ys, zs, h, p, r, OSG_LOCATION.cast_shadow, srid FROM OSG_ITEM_OBJECT JOIN OSG_LOCATION USING (osg_location_id) ORDER BY item_id,object_number""" osgItemObjects, numOsgItemObjects = utils.fetchDataFromDB(cursor, query) # osgItemObjects is (itemId, objectNumber, x, y, z, xs, ys, zs, h, p, r, castShadow, srid) # Now we add Default OSG data items for the objects that are not in OSG_ITEM_OBJECT table query = """ SELECT item_id,object_number FROM item_object WHERE (item_id,object_number) NOT IN (SELECT item_id,object_number FROM OSG_ITEM_OBJECT) ORDER BY item_id,object_number""" objects, num_objects = utils.fetchDataFromDB(cursor, query) for (itemId, objectNumber) in objects: srid = None (x, y, z) = (0, 0, 0) (xs, ys, zs) = (1, 1, 1) query = """ SELECT ST_SRID(geom), st_x(st_centroid(geom)), st_y(st_centroid(geom)), min_z + ((max_z - min_z) / 2), st_xmax(geom)-st_xmin(geom) as dx, st_ymax(geom)-st_ymin(geom) as dy, (max_z - min_z) as dz FROM ITEM WHERE item_id = %s and geom is not %s""" queryArgs = [itemId, None] footprints, num_footprints = utils.fetchDataFromDB( cursor, query, queryArgs) if num_footprints: (srid, x, y, z, xs, ys, zs) = footprints[0] if xs == 0: xs = 1 if ys == 0: ys = 1 if zs == 0: zs = 1 osgItemObjects.append( [itemId, objectNumber, x, y, z, xs, ys, zs, 0, 0, 0, False, srid]) # Now let's add them to the XML for (itemId, objectNumber, x, y, z, xs, ys, zs, h, p, r, castShadow, srid) in osgItemObjects: # only call getOSGPosition if [x,y,z] are not None if all(position is not None for position in [x, y, z]) and itemId > 0: if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) uniqueName = utils.codeOSGActiveObjectUniqueName( cursor, utils.AO_TYPE_OBJ, itemId=itemId, objectId=objectNumber) proto = "Bounding Box" activeObject = viewer_conf_api.activeObject(prototype=proto, uniqueName=uniqueName) setting = viewer_conf_api.setting( x=x, y=y, z=z, xs=xs, ys=ys, zs=zs, h=h, p=p, r=r, castShadow=(1 if castShadow else 0)) activeObject.set_setting(setting) layer.add_activeObject(activeObject) activeObjects.add_layer(layer) # Add the labels layer = viewer_conf_api.layer(name='labels') utils.dbExecute( cursor, 'SELECT osg_label_name, text, red, green, blue, ' + 'rotate_screen, outline, font, srid, x, y, z, xs, ys, zs, h, ' + 'p, r, cast_shadow FROM OSG_LABEL INNER JOIN ' + 'OSG_LOCATION ON OSG_LABEL.osg_location_id=' + 'OSG_LOCATION.osg_location_id') rows = cursor.fetchall() for (name, text, red, green, blue, rotatescreen, outline, font, srid, x, y, z, xs, ys, zs, h, p, r, castShadow) in rows: proto = "labelPrototype" uniqueName = utils.codeOSGActiveObjectUniqueName(cursor, utils.AO_TYPE_LAB, labelName=name) if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) activeObject = viewer_conf_api.activeObject( prototype=proto, uniqueName=uniqueName, labelText=text, labelColorRed=red, labelColorGreen=green, labelColorBlue=blue, labelRotateScreen=rotatescreen, outline=outline, Font=font) setting = viewer_conf_api.setting(x=x, y=y, z=z, xs=xs, ys=ys, zs=zs, h=h, p=p, r=r, castShadow=(1 if castShadow else 0)) activeObject.set_setting(setting) layer.add_activeObject(activeObject) activeObjects.add_layer(layer) rootObject.set_activeObjects(activeObjects) # Create the XML rootObject.export(open(opts.output, 'w'), 0) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % ( elapsed_time, logname) print(msg) logger.info(msg)
def run(args): # set logging level global logger global connection global cursor logname = os.path.basename(__file__) + '.log' logger = utils.start_logging(filename=logname, level=args.log) localtime = utils.getCurrentTimeAsAscii() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logger.info(msg) # start timer t0 = time.time() # connect to the DB connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport) if args.itemid == '?': utils.listRawDataItems(cursor) return else: for rawDataItemId in args.itemid.split(','): # fetch the abs_path abs_paths = fetch_abs_path(rawDataItemId) msg = 'Abs path fetched: %s' % abs_paths print msg logger.info(msg) # fetch the potree abs_paths abs_potree_paths, num_potree = fetch_potree_abs_paths(rawDataItemId) msg = '%s abs potree paths fetched %s' %(num_potree, abs_potree_paths) print msg logger.info(msg) # fetch the nexus abs_paths abs_nexus_paths, num_nexus = fetch_nexus_abs_paths(rawDataItemId) msg = '%s abs nexus paths fetched %s' %(num_nexus, abs_nexus_paths) print msg logger.info(msg) # fetch the OSG abs_paths PC abs_osg_pc_paths, num_osg_pc = fetch_osg_abs_paths_pc(rawDataItemId) msg = '%s abs OSG paths for PC fetched: %s' %(num_osg_pc, abs_osg_pc_paths) print msg logger.info(msg) # fetch the OSG abs_paths mesh abs_osg_mesh_paths, num_osg_mesh = fetch_osg_abs_paths_mesh(rawDataItemId) msg = '%s abs OSG paths for meshes fetched: %s' %(num_osg_mesh, abs_osg_mesh_paths) print msg logger.info(msg) # fetch the OSG abs_paths picture abs_osg_picture_paths, num_osg_picture = fetch_osg_abs_paths_picture(rawDataItemId) msg = '%s abs OSG paths for pictures fetched: %s' %(num_osg_picture, abs_osg_picture_paths) print msg logger.info(msg) # fetch the OSG abs_paths PC BG abs_osg_pc_bg_paths, num_osg_pc_bg = fetch_osg_abs_paths_pc_bg(rawDataItemId) msg = '%s abs OSG paths for PC BG fetched: %s' %(num_osg_pc_bg, abs_osg_pc_bg_paths) print msg logger.info(msg) # remove the files related to the above absolute paths for abs_paths_to_remove in (abs_paths, abs_potree_paths, abs_nexus_paths, abs_osg_pc_paths, abs_osg_mesh_paths, abs_osg_picture_paths, abs_osg_pc_bg_paths): remove_data(abs_paths_to_remove) msg = 'Removed data locations related to raw data item %s (%s)!' % (rawDataItemId, abs_paths[0]) print msg logger.info(msg) # measure elapsed time elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % (elapsed_time, logname) print(msg) logger.info(msg)
def run(args): global logger global offsetX global offsetY global offsetZ logname = os.path.basename(args.output) + '.log' logger = utils.start_logging(filename=logname, level=args.log) # start logging localtime = utils.getCurrentTimeAsAscii() msg = __file__ + ' script logging start at %s' % localtime print msg logger.info(msg) t0 = time.time() # connect to DB and get a cursor connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost) # We assume the osg location is relative # We need to make it absolute by adding the offset of the background with srid as provided query = """ SELECT C.offset_x, C.offset_y, C.offset_z FROM raw_data_item A, raw_data_item_pc B, osg_data_item_pc_background C WHERE A.raw_data_item_id = B.raw_data_item_id AND B.raw_data_item_id = C.raw_data_item_id AND A.srid = %s""" queryArgs = [ args.srid, ] backgroundOffsets, num_backgrounds = utils.fetchDataFromDB( cursor, query, queryArgs) if num_backgrounds: (offsetX, offsetY, offsetZ) = backgroundOffsets[0] # get all items query = 'SELECT item_id, ST_ASGEOJSON(geom), min_z, max_z FROM item WHERE NOT background ORDER BY item_id' sites, num_sites = utils.fetchDataFromDB(cursor, query) data = [] for (itemId, itemGeom, minz, maxz) in sites: # Generate the JSON data for this item dataSite = {} dataSite["id"] = itemId if itemGeom != None: dataSite["footprint"] = json.loads(itemGeom)['coordinates'] dataSite["footprint_altitude"] = [minz, maxz] addThumbnail(cursor, itemId, dataSite) addSiteMetaData(cursor, itemId, dataSite) addPointCloud(cursor, itemId, dataSite, args.srid) addMeshes(cursor, itemId, dataSite, args.srid) addObjectsMetaData(cursor, itemId, dataSite, args.srid) data.append(dataSite) # close the Db connection utils.closeConnectionDB(connection, cursor) # save the data into JSON file save2JSON(args.output, data) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % ( elapsed_time, logname) print(msg) logger.info(msg)
self.cursor.execute("SELECT timestep FROM steps WHERE pass=(?)", (False,)) # first timestep that didn't finish yet date = self.cursor.fetchone()[0] # assume all tasks are completed successfully: update all tasks self.cursor.execute("UPDATE tasks SET get_boundary=(?) WHERE timestep=(?)", (True, date)) self.cursor.execute("UPDATE tasks SET wps=(?) WHERE timestep=(?)", (True, date)) self.cursor.execute("UPDATE tasks SET wrf=(?) WHERE timestep=(?)", (True, date)) # get all tasks self.cursor.execute("SELECT * FROM tasks WHERE timestep=(?)", (date,)) tasks = self.cursor.fetchall() # verify all tasks were completed successfully: set timestep to pass if all(item is True for item in tasks[0][1:]): # all subtasks completed self.cursor.execute("UPDATE steps SET pass=(?) WHERE timestep=(?)", (True, date)) # commit changes self.connection.commit() if __name__ == "__main__": global logger logger = utils.start_logging("test.log") db = database() db._new_database() # db._connect_to_database() start_date = datetime.datetime(2014, 07, 16, 0) end_date = datetime.datetime(2014, 07, 20, 0) db.create_list_datetimes(start_date, end_date, nhours=1) db.testcode() # db._add_timesteps_to_db() db._close_connection() exit()
def run(args): logname = os.path.basename(args.input) + '.log' utils.start_logging(filename=logname, level=args.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logging.info(msg) logging.info('Checking validity of SQL file') # Check that beginning of the file does not contain a create database statement if os.popen('head -500 ' + args.input + ' | grep "CREATE DATABASE"').read().count("CREATE DATABASE"): msg = "You must remove CREATE DATABASE statement from the SQL file" print msg logging.error(msg) return # Check that ther are not defaults in TIMESTAMPS that would cause errors if os.popen('grep "TIMESTAMP DEFAULT" ' + args.input).read().count("TIMESTAMP DEFAULT"): msg = "You must remove any DEFAULT value of any TIMESTAMP column" print msg logging.error(msg) return # Check that ther are not index creations if os.popen('grep "INDEX" ' + args.input).read().count("INDEX"): msg = "You must remove any INDEX creation" print msg logging.error(msg) return if os.popen("""grep '"' """ + args.input).read().count('"'): msg = 'You must remove any double quote (")' print msg logging.error(msg) dangerousWords = [] for line in open(args.input, 'r').read().split('\n'): if not line.startswith('--'): for word in line.split(): if word.count('"') == 1: dangerousWords.append(word) if len(dangerousWords): msg = 'Also, before removing all ", take care of table and column names that would be incorrect when removing ".\n If any of the following is a table or column name please be sure that it does not have white spaces: ' + ','.join( dangerousWords) print msg logging.error(msg) return return # Establish connection with DB connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport) # First we drop all tables in attribute logging.info("Dropping all previous attribute tables") for tablename in ('tbl2_site_relation', 'tbl2_object_depression', 'tbl2_object_decoration', 'tbl2_object_material', 'tbl1_object', 'tbl1_site'): cursor.execute('DROP TABLE IF EXISTS ' + tablename + ' CASCADE') connection.commit() # First we need to drop the previous constraints in tbl1_site and tbl1_object # logging.info("Dropping constraints in tbl1_site and tbl1_object tables") # for tablename in ('tbl1_site','tbl1_object'): # cursor.execute("select constraint_name from information_schema.table_constraints where table_name=%s", [tablename,]) # constraintNames = cursor.fetchall() # for (constraintName, ) in constraintNames: # cursor.execute('ALTER TABLE ' + tablename + ' DROP CONSTRAINT %s CASCADE', [constraintName,]) # connection.commit() # This script will drop all attribute tables and create them again logging.info('Executing SQL file %s' % args.input) #utils.load_sql_file(cursor, args.input) connParams = utils.postgresConnectString(args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport, True) logFile = os.path.basename(args.input) + '.log' command = 'psql ' + connParams + ' -f ' + args.input + ' &> ' + logFile logging.info(command) os.system(command) #Check errors if os.popen('cat ' + logFile + ' | grep ERROR').read().count("ERROR"): msg = 'There was some errors in the data loading. Please see log ' + logFile print msg logging.error(msg) return # Set select permissions to all new tables logging.info('Granting select permissions to all tables') cursor.execute( "select tablename from pg_tables where schemaname = 'public'") tablesNames = cursor.fetchall() for (tableName, ) in tablesNames: cursor.execute('GRANT SELECT ON ' + tableName + ' TO public') # We check that the added Sites and Objects are also in Data Management part of the DB # All sites in tbl1_site must have an entry in ITEM logging.info( 'Adding items in attribute data that are missing in ITEM table') query = 'SELECT site_id from tbl1_site WHERE site_id NOT IN (SELECT item_id FROM item)' sites, num_sites = utils.fetchDataFromDB(cursor, query) for (siteId, ) in sites: utils.dbExecute( cursor, "INSERT INTO ITEM (item_id, background) VALUES (%s,%s)", [siteId, False]) utils.dbExecute( cursor, "INSERT INTO ITEM_OBJECT (item_id, object_number) VALUES (%s,%s)", [siteId, utils.ITEM_OBJECT_NUMBER_ITEM]) # All objects in tbl1_object must also be in ITEM_OBJECT logging.info( 'Adding items objects in attribute data that are missing in ITEM_OBJECT table' ) query = 'SELECT site_id,object_id from tbl1_object WHERE (site_id,object_id) NOT IN (SELECT item_id,object_number FROM item_object)' sites_objects, num_sites_objects = utils.fetchDataFromDB(cursor, query) for (siteId, objectId) in sites_objects: utils.dbExecute( cursor, "INSERT INTO ITEM_OBJECT (item_id, object_number) VALUES (%s,%s)", [siteId, objectId]) #We add again the constraints that link management and attribute data logging.info('Adding constraints between attribute and items') cursor.execute("""ALTER TABLE tbl1_object ADD FOREIGN KEY (site_id, object_id) REFERENCES ITEM_OBJECT (item_id, object_number) ON UPDATE NO ACTION ON DELETE NO ACTION""") connection.commit() cursor.execute("""ALTER TABLE tbl1_site ADD FOREIGN KEY (site_id) REFERENCES ITEM (item_id) ON UPDATE NO ACTION ON DELETE NO ACTION""") connection.commit() elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % ( elapsed_time, logname) print(msg) logging.info(msg)
logger.info('creating {F} gene infomation ...'.format(F=F)) gencode_codon_list(F, 'db/cancergene_pos_list.txt') hin.close() df = pd.read_table("db/cancergene_pos_list.txt", sep='\s+', header=None) incomp_IDs = df[(df[4].astype(str).str.len() == 1) | (df[4].astype(str).str.len() == 2)][1].unique() df = df[~df[1].isin(incomp_IDs)] del df[1] df = df.drop_duplicates() df.to_csv('db/cancergene_pos_list_filtered.txt', sep = ' ', header =False, index=False) # sort hout = open('db/cancergene_pos_list_final.txt', 'w') s_ret = subprocess.call(["sort", "-k1", 'db/cancergene_pos_list_filtered.txt'], stdout = hout) if s_ret != 0: print >> sys.stderr, "Error in sorting merged junction file" sys.exit(1) hin.close() hout.close() if __name__ == "__main__": log_file = 'stdout' # auto-name the log file log_level = '' utils.start_logging(log_file=log_file, log_level='INFO', verbose=False) # start logging main()
import config import utils import time import json import logging import pandas as pd import textrazor TABLE_NAME = 'ARTICLES_538' NUMBER_OF_RECORDS = 499 now_str = time.strftime('%Y%m%d_%H%M%S') filename_log = 'log/' + now_str + 'get_textrazor_json.txt' utils.start_logging(filename_log) logging.info('*' * 30 + '\nGET Text_Razor JSON - START\n' + '*' * 30) # get data # ---------------------------------------------------------------- conn = utils.create_connection('scraping538.db') query = "SELECT * FROM ARTICLES_538 WHERE TEXTRAZOR_JSON_SAVED = 0 LIMIT {a}".format( a=NUMBER_OF_RECORDS) articles_all = pd.read_sql(query, con=conn) logging.info('Number of records (for which to get the JSON): {a}'.format( a=articles_all.shape[0])) # text razor config textrazor.api_key = config.TEXT_RAZOR_API_KEY client = textrazor.TextRazor(extractors=[ 'entailments', 'relations', 'dependency-trees', "entities", "topics", "words", "phrases" ])
def db_preparation_main(): #start logging log_file = 'stdout' # auto-name the log file log_level = '' utils.start_logging(log_file=log_file, log_level='INFO', verbose=False) # start logging # change directory to db/ if not os.path.exists('db/'): os.mkdir('db/') os.chdir('db/') ##### # make necessary bed files using annot_utils ##### logger.info('making annot_utils files...') # make refseq gene coding bed file (refseq) annot_utils.coding.make_coding_info("gencode_coding.bed.gz", "gencode", "hg19", False, True) annot_utils.gene.make_gene_info("gencode_gene.bed.gz", "gencode", "hg19", False, True) logger.info('finished making annot_utils files...') # get trinucleotide context for each bed line # hack to index the FASTA file fa = pysam.Fastafile( '/home/w3varann/genomon_pipeline-2.4.0/database/GRCh37/GRCh37.fa') # gencode dictionary hin = gzip.open( '/home/ysaito/bin/annot_utils-0.2.1/annot_utils/data/hg19/wgEncodeGencodeCompV19.txt.gz', 'r') ID_dict = {} for line in hin: F = line.rstrip('\n').split('\t') gene = str(F[12]) genID = F[1] ID_dict[genID] = str(gene) hin.close() ###### # Average Expression Data ###### logger.info('reading in expression data...') expr_dict = {} with open("../input_db/CCLE_avg_exp.txt") as handle: # skip comment line(s) comment_line = handle.readline() header = handle.readline() for line in handle: F = line.rstrip('\n').split('\t') # consider genes with multiple transcripts if F[0] in expr_dict.keys(): expr_dict[F[0]] = str(F[1]) + ":" + str(expr_dict[F[0]]) else: expr_dict[F[0]] = F[1] handle.close() # get average expression for genes with multiple transcripts for k, v in expr_dict.items(): if ":" in v: v_list = [float(n) for n in v.split(':')] expr_dict[k] = sum(v_list) / len(v_list) # get median expression of all genes all_median_expr = np.nanmedian([float(v) for v in expr_dict.values()]) logger.info('finished reading in expression data') ###### # Average Replication Time Data (for gene) ###### logger.info('reading in replication_time data...') # skip comment line(s) with open("../input_db/replication_time.txt") as hIN: first_line = next(hIN) if first_line.startswith('#'): skip_rows = 1 if first_line.startswith('#') else 0 hIN.close() # read in data frame rep_df = pd.read_table("../input_db/replication_time.txt", sep='\t', skiprows=skip_rows) # make dictionary rep_dict = rep_df.set_index('gene')['replication_time'].to_dict() ###### # Average Replication Time Data (for position) ###### # skip comment line(s) with open("../input_db/replication_time_position.txt") as hIN: first_line = next(hIN) if first_line.startswith('#'): skip_rows = 1 if first_line.startswith('#') else 0 hIN.close() # read in data frame rep_pos_df = pd.read_table("../input_db/replication_time_position.txt", sep='\t', skiprows=skip_rows) rep_pos_df['chr_start'] = rep_pos_df['chr'].astype( str) + ':' + rep_pos_df['start'].astype(str) # make dictionary rep_pos_dict = rep_pos_df.set_index( 'chr_start')['replication_time'].to_dict() # get averaage replication time of all position all_median_rep_time = np.nanmedian( [float(v) for v in rep_pos_dict.values()]) logger.info('finished reading in replication_time data') ###### # Fill in missing replication time data and expression data ###### logger.info( 'filling in missing replication time data and expression data...') with gzip.open('gencode_gene.bed.gz') as hIN: for line in hIN: F = line.rstrip('\n').split('\t') chrom = F[0].replace('chr', '') if chrom not in chroms: continue gene_start = int(F[1]) gene_end = int(F[2]) gene_mid_pos = (gene_start + gene_end) / 2 gene = ID_dict[F[3]] if gene not in rep_dict.keys(): gene_mid_pos_floor = int( math.floor(gene_mid_pos / 100000) * 100000 + 1) pos = chrom + ':' + str(gene_mid_pos_floor) if pos in rep_pos_dict.keys(): rep_dict[gene] = rep_pos_dict[chrom + ':' + str(gene_mid_pos_floor)] else: rep_dict[gene] = all_median_rep_time if gene not in expr_dict.keys(): expr_dict[gene] = all_median_expr hIN.close() logger.info( 'finished filling in missing replication time data and expression data' ) ###### # get trinucleotide context ###### logger.info('getting trinucleotide context...') gencode_df = pd.read_table("gencode_coding.bed.gz", names=('chr', 'start', 'end', 'ID', 'type', 'strand')) # select only coding gencode_df = gencode_df[(gencode_df['type'] == 'coding') | (gencode_df['type'] == 'intron')] gencode_df['gene'] = gencode_df['ID'].map(ID_dict) # sort by ID gencode_df = gencode_df.sort_index(by=['ID', 'chr', 'start', 'end']) #remove chrY gencode_df = gencode_df[gencode_df['chr'] != 'chrY'] gencode_df.to_csv("gencode_coding.modified.bed", sep='\t', header=False, index=False) pos_to_context = open("merged_pos_to_context.txt", 'w') with open("gencode_coding.modified.bed", 'r') as hin: for line in hin: F = line.rstrip('\n').split('\t') chrom = F[0].replace('chr', '') if chrom not in chroms: continue start = int(F[1]) end = int(F[2]) length = end - start ID = F[3] gene = F[6] if F[4] == "coding": for i in range(length): strand = F[5] pos = start + i try: # 0-based nucs = fa.fetch(reference=chrom, start=pos - 1, end=pos + 2).upper() if nucs[1] == 'G': strand = strand_pairing[strand] nucs = utils.rev_seq(nucs) if nucs[1] == 'A': strand = strand_pairing[strand] nucs = utils.rev_seq(nucs) except Exception as inst: logger.debug("{0}: {1}".format(type(inst), inst.args)) if 'N' not in nucs: print >> pos_to_context, '\t'.join([ gene, chrom + ':' + str(pos), strand, nucs, str(expr_dict[gene]), str(rep_dict[gene]) ]) if F[4] == "intron": for pos in (start, start + 1, end - 2, end - 1): strand = F[5] try: # 0-based nucs = fa.fetch(reference=chrom, start=pos - 1, end=pos + 2).upper() if nucs[1] == 'G': strand = strand_pairing[strand] nucs = utils.rev_seq(nucs) if nucs[1] == 'A': strand = strand_pairing[strand] nucs = utils.rev_seq(nucs) except Exception as inst: logger.debug("{0}: {1}".format(type(inst), inst.args)) if 'N' not in nucs: print >> pos_to_context, '\t'.join([ gene, chrom + ':' + str(pos), strand, nucs, str(expr_dict[gene]), str(rep_dict[gene]) ]) hin.close() pos_to_context.close() logger.info('finished getting trinucleotide context') # sort hout = open('merged_pos_to_context_sorted.txt', 'w') s_ret = subprocess.call(["sort", "-k2", "merged_pos_to_context.txt"], stdout=hout) hout.close() # sometimes, multiple genes share same genome as coding regions. #chr1 367658 368597 OR4F16(NM_001005277) coding + #chr1 367658 368597 OR4F29(NM_001005221) coding + # in these cases, we use higher expression. #concern: is there any genomic position which diffrent genes on different strands share? --> we checked the posssibility and there was no position like that. rep_pos_df = pd.read_table("merged_pos_to_context_sorted.txt", sep='\t', names=('gene', 'pos', 'strand', 'trinucleotide', 'expression', 'replication_time')) del rep_pos_df['gene'] rep_pos_df = rep_pos_df.drop_duplicates() rep_pos_df.to_csv("merged_pos_to_context_duplicate_dropped.txt", sep='\t', header=False, index=False) # top expression tmp_pos = "" tmp_exp = 0 hout = open("merged_pos_to_context_duplicate_dropped2.txt", "w") with open("merged_pos_to_context_duplicate_dropped.txt", 'r') as hin: for line in hin: F = line.rstrip('\n').split('\t') if tmp_pos != F[0] and tmp_pos != "": print >> hout, '\t'.join( [tmp_pos, tmp_strand, tmp_trinuc, tmp_exp, tmp_rep_time]) tmp_pos = F[0] tmp_strand = F[1] tmp_trinuc = F[2] tmp_exp = F[3] tmp_rep_time = F[4] continue tmp_pos = F[0] tmp_strand = F[1] tmp_trinuc = F[2] if F[3] > tmp_exp: tmp_exp = F[3] tmp_rep_time = F[4] print >> hout, '\t'.join( [tmp_pos, tmp_strand, tmp_trinuc, tmp_exp, tmp_rep_time]) hin.close() hout.close() rep_pos_df = pd.read_table("merged_pos_to_context_duplicate_dropped2.txt", sep='\t', names=('pos', 'strand', 'trinucleotide', 'expression', 'replication_time')) rep_pos_df['replication_time'] = rep_pos_df['replication_time'].fillna( all_median_rep_time) rep_pos_df['exp_group'], exp_bins = pd.qcut( rep_pos_df['expression'].astype(float), 15, labels=[ 'exp_class1', 'exp_class2', 'exp_class3', 'exp_class4', 'exp_class5', 'exp_class6', 'exp_class7', 'exp_class8', 'exp_class9', 'exp_class10', 'exp_class11', 'exp_class12', 'exp_class13', 'exp_class14', 'exp_class15' ], retbins=True) rep_pos_df['rep_group'], rep_bins = pd.qcut( rep_pos_df['replication_time'].astype(float), 15, labels=[ 'rep_class1', 'rep_class2', 'rep_class3', 'rep_class4', 'rep_class5', 'rep_class6', 'rep_class7', 'rep_class8', 'rep_class9', 'rep_class10', 'rep_class11', 'rep_class12', 'rep_class13', 'rep_class14', 'rep_class15' ], retbins=True) rep_pos_df['class'] = rep_pos_df['exp_group'].str.cat( rep_pos_df['rep_group'], sep='_') rep_pos_df.to_csv("merged_pos_to_context_class.txt", sep='\t', header=False, index=False) print "exp bins" print exp_bins print "rep bins" print rep_bins ###### # output ###### # output f = open('CCLE_avg_exp_output.txt', 'w') for k, v in sorted(expr_dict.items()): f.write('{0}\t{1}\n'.format(k, v)) f.close() f2 = open('replication_time_output.txt', 'w') for k, v in sorted(rep_dict.items()): if v == 'NaN': rep_dict[k] = all_avg_rep_time f2.write('{0}\t{1}\n'.format(k, v)) f2.close()
def run(opts): global logger # Define logger and start logging #logname = os.path.basename(opts.output) + '.log' logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' logger = utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logger.info(msg) if not opts.output.endswith(".conf.xml"): logger.error('The output file must end with .conf.xml') raise IOError('The output file must end with .conf.xml') # Create python postgres connection global cursor connection, cursor = utils.connectToDB(opts.dbname, opts.dbuser, opts.dbpass, opts.dbhost, opts.dbport) # Check that provided background is in DB query = """ SELECT OSG_DATA_ITEM_PC_BACKGROUND.abs_path,srid FROM OSG_DATA_ITEM_PC_BACKGROUND JOIN RAW_DATA_ITEM USING (raw_data_item_id)""" rows, num_rows = utils.fetchDataFromDB(cursor, query) backGroundAbsPath = None backgroundSRID = None for (bgAbsPath,bgSRID) in rows: if opts.background == os.path.basename(bgAbsPath): backGroundAbsPath = bgAbsPath backgroundSRID = bgSRID if backGroundAbsPath == None: errorMsg = 'Background ' + opts.background + ' is not found' logger.error(errorMsg) raise Exception(errorMsg) # Get the root object: the OSG configuration rootObject = viewer_conf_api.osgRCconfiguration() # set version rootObject.set_version("0.2") # Add all the different XML of the active objects # (we add distinct since the boundings will share XMLs) query = """ SELECT DISTINCT xml_abs_path FROM OSG_DATA_ITEM ORDER BY xml_abs_path""" rows, num_rows = utils.fetchDataFromDB(cursor, query) for (xmlPath,) in rows: if xmlPath.count(opts.osg) == 0: logger.error('Mismatch between given OSG data directory ' + 'and DB content') rootObject.add_objectLibrary(viewer_conf_api.objectLibrary (url=os.path.relpath(xmlPath, opts.osg))) # Add the object library with the boundings rootObject.add_objectLibrary(viewer_conf_api.objectLibrary (url=utils.BOUNDINGS_XML_RELATIVE)) # Add the cameras that are in the DB cameras = viewer_conf_api.cameras() query = """ SELECT osg_camera_name, srid, x, y, z, h, p, r FROM OSG_CAMERA JOIN OSG_LOCATION USING (osg_location_id)""" rows, num_rows = utils.fetchDataFromDB(cursor, query) for (name, srid, x, y, z, h, p, r) in rows: if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) cameras.add_camera(viewer_conf_api.camera (name=name, x=x, y=y, z=z, h=h, p=p, r=r)) # Add Default cameras for the items that have no camera in the DB query = """ SELECT item_id, ST_SRID(geom), st_x(st_centroid(geom)), st_y(st_centroid(geom)), min_z + ((max_z - min_z) / 2) FROM ITEM WHERE NOT background AND geom IS NOT null AND item_id NOT IN ( SELECT DISTINCT item_id FROM OSG_ITEM_CAMERA ) ORDER BY item_id""" rows, numitems = utils.fetchDataFromDB(cursor, query) for (itemId, srid, x, y, z) in rows: # only call getOSGPosition if [x,y,z] are not None # should item_id = -1 be added? if all(position is not None for position in [x,y,z]) and itemId>0: if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) cameras.add_camera(viewer_conf_api.camera (name=utils.DEFAULT_CAMERA_PREFIX + str(itemId), x=x, y=y, z=z)) rootObject.set_cameras(cameras) # Add the XML content of the preferences rootObject.set_preferences(viewer_conf_api.parseString(DEFAULT_PREFENCES)) attributes = viewer_conf_api.attributes() # Use generic method to fill all properties. # We need the name in the XML, the column name in the DB and # the table name in the DB for property in utils.ATTRIBUTES_ORDER: (cName, tName) = utils.ATTRIBUTES[property] elements = getattr(viewer_conf_api, property + 's')() # We need to call the columns and tables with extra "" because # they were created from the Access DB # utils.dbExecute(cursor, 'SELECT "' + cName + '" FROM "' + tName + '"') utils.dbExecute(cursor, 'SELECT ' + cName + ' FROM ' + tName) for (element,) in cursor: getattr(elements, 'add_' + property)(getattr( viewer_conf_api, property)(name=element)) getattr(attributes, 'set_' + property + 's')(elements) rootObject.set_attributes(attributes) # Add all the static objects, i.e. the OSG from the background # Add the static object for the background staticObjects = viewer_conf_api.staticObjects() staticObjects.add_staticObject(viewer_conf_api.staticObject (url=os.path.relpath( glob.glob(backGroundAbsPath + '/' + utils.OSG_DATA_PREFIX + '.osgb')[0], opts.osg))) # Add hardcode DOME staticObjects.add_staticObject(viewer_conf_api.staticObject (url=utils.DOMES_OSG_RELATIVE)) rootObject.set_staticObjects(staticObjects) # Add the 5 different layers of active objects activeObjects = viewer_conf_api.activeObjects() # First we add points, meshes and pcitures which are related to # the active_objects_sites layersData = [('points', 'OSG_DATA_ITEM_PC_SITE', utils.AO_TYPE_PC), ('photos', 'OSG_DATA_ITEM_PICTURE', utils.AO_TYPE_PIC), ('meshes', 'OSG_DATA_ITEM_MESH', utils.AO_TYPE_MESH)] for (layerName, tableName, inType) in layersData: layer = viewer_conf_api.layer(name=layerName) query = """ SELECT item_id, raw_data_item_id, OSG_LOCATION.srid, x, y, z, xs, ys, zs, h, p, r, cast_shadow FROM """ + tableName + """ JOIN OSG_DATA_ITEM USING (osg_data_item_id) JOIN OSG_LOCATION USING (osg_location_id) JOIN RAW_DATA_ITEM USING (raw_data_item_id) ORDER BY item_id""" rows, numitems = utils.fetchDataFromDB(cursor, query) for (itemId, rawDataItemId, srid, x, y, z, xs, ys, zs, h, p, r, castShadow) in rows: # only call getOSGPosition if [x,y,z] are not None if all(position is not None for position in [x,y,z]): if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) uniqueName = utils.codeOSGActiveObjectUniqueName(cursor, inType, rawDataItemId) activeObject = viewer_conf_api.activeObject(prototype=uniqueName, uniqueName=uniqueName) setting = viewer_conf_api.setting( x=x, y=y, z=z, xs=xs, ys=ys, zs=zs, h=h, p=p, r=r, castShadow=(1 if castShadow else 0)) activeObject.set_setting(setting) layer.add_activeObject(activeObject) activeObjects.add_layer(layer) # Add the boundings layer = viewer_conf_api.layer(name='boundings') # We first add the boundings that are currently in the DB query = """ SELECT item_id, object_number, x, y, z, xs, ys, zs, h, p, r, OSG_LOCATION.cast_shadow, srid FROM OSG_ITEM_OBJECT JOIN OSG_LOCATION USING (osg_location_id) ORDER BY item_id,object_number""" osgItemObjects, numOsgItemObjects = utils.fetchDataFromDB(cursor, query) # osgItemObjects is (itemId, objectNumber, x, y, z, xs, ys, zs, h, p, r, castShadow, srid) # Now we add Default OSG data items for the objects that are not in OSG_ITEM_OBJECT table query = """ SELECT item_id,object_number FROM item_object WHERE (item_id,object_number) NOT IN (SELECT item_id,object_number FROM OSG_ITEM_OBJECT) ORDER BY item_id,object_number""" objects, num_objects = utils.fetchDataFromDB(cursor, query) for (itemId, objectNumber) in objects: srid = None (x,y,z) = (0,0,0) (xs,ys,zs) = (1,1,1) query = """ SELECT ST_SRID(geom), st_x(st_centroid(geom)), st_y(st_centroid(geom)), min_z + ((max_z - min_z) / 2), st_xmax(geom)-st_xmin(geom) as dx, st_ymax(geom)-st_ymin(geom) as dy, (max_z - min_z) as dz FROM ITEM WHERE item_id = %s and geom is not %s""" queryArgs = [itemId, None] footprints, num_footprints = utils.fetchDataFromDB(cursor, query, queryArgs) if num_footprints: (srid, x, y, z, xs, ys, zs) = footprints[0] if xs == 0: xs = 1 if ys == 0: ys = 1 if zs == 0: zs = 1 osgItemObjects.append([itemId, objectNumber, x, y, z, xs, ys, zs, 0, 0, 0, False, srid]) # Now let's add them to the XML for (itemId, objectNumber, x, y, z, xs, ys, zs, h, p, r, castShadow, srid) in osgItemObjects: # only call getOSGPosition if [x,y,z] are not None if all(position is not None for position in [x,y,z]) and itemId>0: if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) uniqueName = utils.codeOSGActiveObjectUniqueName(cursor, utils.AO_TYPE_OBJ, itemId = itemId, objectId = objectNumber) proto = "Bounding Box" activeObject = viewer_conf_api.activeObject(prototype=proto, uniqueName=uniqueName) setting = viewer_conf_api.setting( x=x, y=y, z=z, xs=xs, ys=ys, zs=zs, h=h, p=p, r=r, castShadow=(1 if castShadow else 0)) activeObject.set_setting(setting) layer.add_activeObject(activeObject) activeObjects.add_layer(layer) # Add the labels layer = viewer_conf_api.layer(name='labels') utils.dbExecute(cursor, 'SELECT osg_label_name, text, red, green, blue, ' + 'rotate_screen, outline, font, srid, x, y, z, xs, ys, zs, h, ' + 'p, r, cast_shadow FROM OSG_LABEL INNER JOIN ' + 'OSG_LOCATION ON OSG_LABEL.osg_location_id=' + 'OSG_LOCATION.osg_location_id') rows = cursor.fetchall() for (name, text, red, green, blue, rotatescreen, outline, font, srid, x, y, z, xs, ys, zs, h, p, r, castShadow) in rows: proto = "labelPrototype" uniqueName = utils.codeOSGActiveObjectUniqueName(cursor, utils.AO_TYPE_LAB, labelName = name) if (srid is not None) and (srid == bgSRID): x, y, z = getOSGPosition(x, y, z, srid) else: x, y, z = getOSGPosition(x, y, z) activeObject = viewer_conf_api.activeObject( prototype=proto, uniqueName=uniqueName, labelText=text, labelColorRed=red, labelColorGreen=green, labelColorBlue=blue, labelRotateScreen=rotatescreen, outline=outline, Font=font) setting = viewer_conf_api.setting( x=x, y=y, z=z, xs=xs, ys=ys, zs=zs, h=h, p=p, r=r, castShadow=(1 if castShadow else 0)) activeObject.set_setting(setting) layer.add_activeObject(activeObject) activeObjects.add_layer(layer) rootObject.set_activeObjects(activeObjects) # Create the XML rootObject.export(open(opts.output, 'w'), 0) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % (elapsed_time, logname) print(msg) logger.info(msg)
def parse_arguments(): # make a parser info = 'Evaluates cancer driver gene methods' parent_parser = argparse.ArgumentParser(description=info) # logging arguments parent_parser.add_argument('-ll', '--log-level', type=str, action='store', default='', help='Write a log file (--log-level=DEBUG for debug mode, ' '--log-level=INFO for info mode)') parent_parser.add_argument('-l', '--log', type=str, action='store', default='stdout', help='Path to log file. (accepts "stdout")') parent_parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Flag for more verbose log output') # add subparsers subparsers = parent_parser.add_subparsers(title='Sub-commands', dest='kind') parser_pipeline = subparsers.add_parser('pipeline', help='Run all sub-commands evaluating methods', description='Run all sub-commands evaluating methods') parser_split = subparsers.add_parser('split_mutations', help='Splits mutations in a MAF-like format into two random halves', description='Splits mutations in a MAF-like format into two random halves. ' 'Each split maintains the proportion of samples in each cancer type.') parser_cgc = subparsers.add_parser('list_overlap', help='Evaluate the overlap of significant genes with the Cancer Gene Census (CGC)', description='Evaluate the overlap of significant genes with the Cancer Gene Census (CGC)') parser_ovlp = subparsers.add_parser('method_overlap', help='Counts the number of methods that find specific genes significant', description='Counts the number of methods that find specific genes significant') parser_pval = subparsers.add_parser('pvalue', help='Examine the p-value distribution', description='Examine the p-value distribution') parser_signif = subparsers.add_parser('num_signif', help='Examine the number of significant genes', description='Examine the number of significant genes') help_info = 'Evaluate method consistency' parser_consis = subparsers.add_parser('consistency', help=help_info, description='Evaluate method consistency') # program arguments for i, parser in enumerate([parser_pipeline, parser_split, parser_cgc, parser_ovlp, parser_pval, parser_signif, parser_consis]): # group of parameters major_parser = parser.add_argument_group(title='Major options') advance_parser = parser.add_argument_group(title='Advanced options') if i != 1 and i != 6: help_str = 'directory containing results from methods on full data' major_parser.add_argument('-i', '--input-dir', type=str, default=None, help=help_str) elif i == 1: help_str = 'Mutation file to split' major_parser.add_argument('-m', '--mutations', type=str, required=True, help=help_str) help_str = 'Configuration file (YAML format)' major_parser.add_argument('-config', '--config', type=str, default=None, help=help_str) help_str = 'output directory' major_parser.add_argument('-o', '--output', type=str, default=None, help=help_str) if i == 0: list_parser = major_parser.add_mutually_exclusive_group(required=True) help_str = 'Path to Cancer Gene Census file' list_parser.add_argument('-c', '--cgc', type=str, default=None, help=help_str) help_str = 'Custom driver gene list' list_parser.add_argument('-g', '--gene-list', type=str, default=None, help=help_str) help_str = ('Minimum number of methods finding a gene significant to ' 'not include that gene\' p-value (Default: 3)') major_parser.add_argument('-m', '--min', type=int, default=3, help=help_str) help_str = 'Directory containing the consistency results' major_parser.add_argument('-consis-dir', '--consistency-dir', type=str, required=True, help=help_str) help_str = 'Q-value threshold for significance (Default: 0.1)' advance_parser.add_argument('-q', '--qvalue', type=float, default=.1, help=help_str) help_str = 'Ranking depth to consider for consistency (Default: 100)' advance_parser.add_argument('-d', '--depth', type=int, default=100, help=help_str) help_str = 'Generate plots examining evaluation (Default: False)' advance_parser.add_argument('-p', '--plot', action='store_true', default=False, help=help_str) if i == 1: help_str = ('Column name containing sample IDs (Default: checks ' '"Tumor_Sample_Barcode" or "Tumor_Sample")') advance_parser.add_argument('-s', '--sample-col', type=str, help=help_str) help_str = 'Number of iterations to randomly split data (Default: 10)' advance_parser.add_argument('-n', '--number', type=int, default=10, help=help_str) elif i == 2: list_parser = major_parser.add_mutually_exclusive_group(required=True) help_str = 'Path to Cancer Gene Census file' list_parser.add_argument('-c', '--cgc', type=str, default=None, help=help_str) help_str = 'Custom driver gene list' list_parser.add_argument('-g', '--gene-list', type=str, default=None, help=help_str) help_str = 'Q-value threshold for significance (Default: 0.1)' advance_parser.add_argument('-q', '--qvalue', type=float, default=.1, help=help_str) help_str = 'Generate plots examining evaluation (Default: False)' advance_parser.add_argument('-p', '--plot', action='store_true', default=False, help=help_str) elif i == 3: help_str = 'Q-value threshold for significance (Default: 0.1)' advance_parser.add_argument('-q', '--qvalue', type=float, default=.1, help=help_str) help_str = 'Generate plots examining evaluation (Default: False)' advance_parser.add_argument('-p', '--plot', action='store_true', default=False, help=help_str) elif i == 4: help_str = ('Minimum number of methods finding a gene significant to ' 'not include that gene\' p-value (Default: 3)') major_parser.add_argument('-m', '--min', type=int, default=3, help=help_str) help_str = ('Path to Cancer Gene Census file (optional). Additionaly ' 'removes genes from the CGC.') major_parser.add_argument('-c', '--cgc', type=str, default=None, help=help_str) help_str = 'Generate plots examining evaluation (Default: False)' advance_parser.add_argument('-p', '--plot', action='store_true', default=False, help=help_str) elif i == 5: help_str = 'Q-value threshold for significance (Default: 0.1)' advance_parser.add_argument('-q', '--qvalue', type=float, default=.1, help=help_str) help_str = 'Generate plots examining evaluation (Default: False)' advance_parser.add_argument('-p', '--plot', action='store_true', default=False, help=help_str) elif i == 6: help_str = 'Directory containing the consistency results' major_parser.add_argument('-consis-dir', '--consistency-dir', type=str, required=True, help=help_str) help_str = 'Ranking depth to consider for consistency (Default: 100)' advance_parser.add_argument('-d', '--depth', type=int, default=100, help=help_str) help_str = 'Generate plots examining evaluation (Default: False)' advance_parser.add_argument('-p', '--plot', action='store_true', default=False, help=help_str) args = parent_parser.parse_args() # handle logging if args.log_level or args.log: if args.log: log_file = args.log else: log_file = '' # auto-name the log file else: log_file = os.devnull log_level = args.log_level utils.start_logging(log_file=log_file, log_level=log_level, verbose=args.verbose) # start logging opts = vars(args) # log user entered command #logger.info('Version: {0}'.format(prob2020.__version__)) logger.info('Command: {0}'.format(' '.join(sys.argv))) return opts
""" Functions for training estimators, performing cross validation, and making predictions """ __author__ = 'Bryan Gregory' __email__ = '*****@*****.**' __date__ = '11-19-2013' #Internal modules import utils #Start logger to record all info, warnings, and errors to Logs/logfile.log log = utils.start_logging(__name__) import ml_metrics #External modules import time from datetime import datetime from sklearn import (metrics, cross_validation, linear_model, preprocessing) from sklearn.externals import joblib import numpy as np from scipy import sparse from scipy.sparse import coo_matrix, hstack, vstack #-----Run Cross Validation Steps-----# def cross_validate(model, settings, dfTrn_Segment, dfTest_Segment): #Combine the train and test feature matrices and create targets mtxTrn, mtxTest, mtxTrnTarget, mtxTestTarget = combine_features(model, dfTrn_Segment, dfTest_Segment) #Run CV if settings['cv_method'] in ['march','april','list_split']: cv_preds = cross_validate_temporal(mtxTrn,mtxTest,mtxTrnTarget.ravel(),mtxTestTarget.ravel(),model) if settings['cv_method'] in ['kfold']: cv_preds = cross_validate_kfold(mtxTrn,mtxTest,mtxTrnTarget.ravel(),mtxTestTarget.ravel(),model)
def run(args): # start logging logname = os.path.splitext(os.path.basename(__file__))[0] + '.log' utils.start_logging(filename=logname, level=utils.DEFAULT_LOG_LEVEL) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logging.info(msg) # connect to the DB connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport) itemIds = [] if args.itemid == '': data, num = utils.fetchDataFromDB( cursor, 'SELECT item_id FROM ITEM WHERE NOT background') for (itemId, ) in data: itemIds.append(itemId) else: itemIds = args.itemid.split(',') # close the conection to the DB utils.closeConnectionDB(connection, cursor) # Create queues itemsQueue = multiprocessing.Queue() # The queue of tasks (queries) resultsQueue = multiprocessing.Queue() # The queue of results for itemId in itemIds: itemsQueue.put(int(itemId)) for i in range( args.cores ): #we add as many None jobs as numUsers to tell them to terminate (queue is FIFO) itemsQueue.put(None) procs = [] # We start numUsers users processes for i in range(args.cores): procs.append( multiprocessing.Process( target=runChild, args=(i, itemsQueue, resultsQueue, args.las, args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport))) procs[-1].start() for i in range(len(itemIds)): [procIndex, itemId] = resultsQueue.get() # wait for all users to finish their execution for i in range(args.cores): procs[i].join() # measure elapsed time elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % ( elapsed_time, logname) print(msg) logging.info(msg)
def run(args): global logger global connection global cursor # start logging logname = os.path.basename(args.input) + '.log' logger = utils.start_logging(filename=logname, level=utils.DEFAULT_LOG_LEVEL) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logger.info(msg) if not os.path.isfile(args.input): msg = "Input file is not found!" print msg logger.error(msg) return # connect to the DB connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost, args.dbport) if args.input.endswith('shp'): prjFile = args.input.replace('shp', 'prj') if not os.path.isfile(prjFile): msg = "Input PRJ file is not found!" print msg logger.error(msg) return sqlFile = os.path.basename(args.input).replace('shp', 'sql') shp2psql = 'shp2pgsql -s ' + str(getEPSG( prjFile)) + ' -c ' + args.input + ' sites_geoms_temp > ' + sqlFile print shp2psql logger.info(shp2psql) os.system(shp2psql) else: sqlFile = args.input for line in open(sqlFile, 'r').read().split('\n'): if line.count('CREATE TABLE'): if line.count('sites_geoms_temp') == 0: msg = "The table in the SQL file must be named sites_geom_temp. Replace the table name to sites_geoms_temp!" print msg logger.error(msg) return # clean the temp table if exsisted clean_temp_table(args) # load the table sites_geoms_temp from the SQL file ot the DB success_loading = utils.load_sql_file(cursor, sqlFile) if args.input.endswith('shp'): os.system('rm -rf ' + sqlFile) if success_loading: # check if the SITES table is empty, then change the type of the geom field update_geom_col_type(cursor) # find the lists of new IDs and list of overlapping IDs no_item_well_temp_ids, both_in_item_and_temp_ids = find_lists(cursor) # insert the object geometries per site for the sites not in item, but in the sites_geoms_temp table update_geometries(no_item_well_temp_ids, True) # update the union of object geometries per site for the sites both in item and sites_geoms_temp table update_geometries(both_in_item_and_temp_ids, False) # clean the temp table clean_temp_table(args) # close the conection to the DB utils.closeConnectionDB(connection, cursor) # measure elapsed time elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % ( elapsed_time, logname) print(msg) logger.info(msg) return
from __future__ import division import Microbial_com_modeling as mcm from Microbial_com_modeling import IntegrationError import multiprocess import numpy as np import statsmodels import multiprocessing as mp import time import logging from utils import start_logging, save_json import json import os import matplotlib.pyplot as plt import seaborn as sns logger = start_logging(log_file=True) old_settings = np.seterr(all='raise') start_time = time.time() def draw_plot(directory, fname): sns.set_context("talk") with open("{}/{}.json".format(directory, fname)) as json_file: data = json.load(json_file) sensitivity = np.array(data["sensitivity"]) specificity = np.array(data["specificity"]) value_range = np.array(data["value_range"])
def run(args): global logger global offsetX global offsetY global offsetZ logname = os.path.basename(args.output) + '.log' logger = utils.start_logging(filename=logname, level=args.log) # start logging localtime = utils.getCurrentTimeAsAscii() msg = __file__ + ' script logging start at %s'% localtime print msg logger.info(msg) t0 = time.time() # connect to DB and get a cursor connection, cursor = utils.connectToDB(args.dbname, args.dbuser, args.dbpass, args.dbhost) # We assume the osg location is relative # We need to make it absolute by adding the offset of the background with srid as provided query = """ SELECT C.offset_x, C.offset_y, C.offset_z FROM raw_data_item A, raw_data_item_pc B, osg_data_item_pc_background C WHERE A.raw_data_item_id = B.raw_data_item_id AND B.raw_data_item_id = C.raw_data_item_id AND A.srid = %s""" queryArgs = [args.srid,] backgroundOffsets, num_backgrounds = utils.fetchDataFromDB(cursor, query, queryArgs) if num_backgrounds: (offsetX,offsetY,offsetZ) = backgroundOffsets[0] # get all items query = 'SELECT item_id, ST_ASGEOJSON(geom), min_z, max_z FROM item WHERE NOT background ORDER BY item_id' sites, num_sites = utils.fetchDataFromDB(cursor, query) data = [] for (itemId, itemGeom, minz, maxz) in sites: # Generate the JSON data for this item dataSite = {} dataSite["id"] = itemId if itemGeom != None: dataSite["footprint"] = json.loads(itemGeom)['coordinates'] dataSite["footprint_altitude"] = [minz,maxz] addThumbnail(cursor, itemId, dataSite) addSiteMetaData(cursor, itemId, dataSite) addPointCloud(cursor, itemId, dataSite, args.srid) addMeshes(cursor, itemId, dataSite, args.srid) addObjectsMetaData(cursor, itemId, dataSite, args.srid) data.append(dataSite) # close the Db connection utils.closeConnectionDB(connection, cursor) # save the data into JSON file save2JSON(args.output, data) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % (elapsed_time, logname) print(msg) logger.info(msg)
from __future__ import unicode_literals import sys import os import tweepy from utils import ( start_logging, upload_media, ) import logging log = logging.getLogger(__name__) start_logging() auth = tweepy.OAuthHandler(os.environ['TWITTER_CONSUMER_KEY'], os.environ['TWITTER_CONSUMER_SECRET']) auth.set_access_token(os.environ['TWITTER_ACCESS_TOKEN'], os.environ['TWITTER_ACCESS_TOKEN_SECRET']) api = tweepy.API(auth) media_id = upload_media(api, sys.argv[1]) log.info('media_id = %s', media_id)
import logging import utils import time import sqlite3 import pandas as pd now_str = time.strftime('%Y%m%d_%H%M%S') FILENAME_lOG = 'log/' + now_str + 'report_integration.txt' utils.start_logging(FILENAME_lOG) logging.info('*'*30 + '\nINTEGRATION START\n' + '*'*30) # INPUTS TABLE_NAME = 'ARTICLES_538' STAGING_TABLE = 'S_ARTICLES_538' # Get data # ---------------------------------------------------------------- conn = utils.create_connection('scraping538.db') query = "SELECT * FROM " + STAGING_TABLE + " WHERE PROCESSED = 0" data = pd.read_sql(query, con=conn) data.drop("PROCESSED", axis=1, inplace=True) # Clean data # ---------------------------------------------------------------- # date data['date'] = pd.to_datetime(data['date']) data.rename(columns={"date": "DATE"}, inplace=True) # date_hour
def items_to_upload(): for item in config: if not config[item].get('processed'): yield item def mark_as_processed(item, post_id): debug("Updating config for " + item) config.reload() config[item]['processed'] = 1 config[item]['post id'] = post_id config[item]['posted on'] = str(datetime.now()) config.write() def main(): for item in items_to_upload(): debug('=' * 60) filename = utils.download(item) post_id = posterous.post(title=config[item]['title'], tags=config[item]['tags'], media=filename) mark_as_processed(item, post_id) if __name__ == "__main__": utils.start_logging(__file__) main()
# first timestep that didn't finish yet date = self.cursor.fetchone()[0] # assume all tasks are completed successfully: update all tasks self.cursor.execute("UPDATE tasks SET get_boundary=(?) WHERE timestep=(?)", (True,date,)) self.cursor.execute("UPDATE tasks SET wps=(?) WHERE timestep=(?)", (True,date,)) self.cursor.execute("UPDATE tasks SET wrf=(?) WHERE timestep=(?)", (True,date,)) # get all tasks self.cursor.execute("SELECT * FROM tasks WHERE timestep=(?)", (date,)) tasks = self.cursor.fetchall() # verify all tasks were completed successfully: set timestep to pass if all(item is True for item in tasks[0][1:]): # all subtasks completed self.cursor.execute("UPDATE steps SET pass=(?) WHERE timestep=(?)", (True,date,)) # commit changes self.connection.commit() if __name__=="__main__": global logger logger = utils.start_logging('test.log') db = database() db._new_database() #db._connect_to_database() start_date = datetime.datetime(2014,07,16,0) end_date = datetime.datetime(2014,07,20,0) db.create_list_datetimes(start_date, end_date, nhours=1) db.testcode() #db._add_timesteps_to_db() db._close_connection() exit()
def _archive_output(self, current_time, thours, domain): ''' rename unipost.exe output to wrfpost_d0${domain}_time.grb and archive ''' import shutil # verify that domain is an int if not isinstance(domain, int): message = 'domain id should be an integer' logger.error(message) raise IOError(message) # define original and destination filename origname = 'WRFPRS%02d.tm00' %thours outname = 'wrfpost_d%02d_%s.grb' %(domain, current_time) # rename file and move to archive dir shutil.move(os.path.join(config['post_dir'], origname), os.path.join(config['upp_archive_dir'], outname)) # check if file is indeed archived utils.check_file_exists(os.path.join(config['upp_archive_dir'], outname)) if __name__ == "__main__": logger = utils.start_logging('test.log') postprocess = upp() wrfout_files = glob.glob(os.path.join(config['wrf_run_dir'], 'wrfout_d01*')) postprocess.run_unipost_file(wrfout_files[0], use_t0=True) [postprocess.run_unipost_file(f) for f in wrfout_files[1:]] #postprocess.run_unipost_file( # '/home/WUR/haren009/sources/WRFV3/run/wrfout_d01_2014-07-16_00:00:00')
def run(opts): # Define logging and start logging logname = os.path.basename(opts.config) + '.log' utils.start_logging(filename=logname, level=opts.log) localtime = utils.getCurrentTimeAsAscii() t0 = time.time() msg = os.path.basename(__file__) + ' script starts at %s.' % localtime print msg logging.info(msg) # Parse xml configuration file data = ET.parse(opts.config).getroot() # Database connection connection, cursor = utils.connectToDB(opts.dbname, opts.dbuser, opts.dbpass, opts.dbhost, opts.dbport) # get offset and srid of the background defined in the conf file (bgOffsetX, bgOffsetY, bgOffsetZ, bgSRID) = getBackgroundOffset(data, cursor) bgOffset = (bgOffsetX, bgOffsetY, bgOffsetZ) # Process updates updateAOS = data.xpath('//*[@status="updated"]') # loop over all updates found in the xml config file for ao in updateAOS: #(aoType, proto, uniqueName, siteId, activeobjectId, objectNumber) = \ #getDetails(ao) uniqueName = ao.get('uniqueName') (aoType, itemId, rawDataItemId, objectId, labelName) = utils.decodeOSGActiveObjectUniqueName(cursor, uniqueName) if aoType == None: msg = 'Ignoring operation on %s. Could not decode uniqueName' % uniqueName print msg logging.warning(msg) else: # check if the object is in the DB osgLocationId = getOSGLocationId(cursor, aoType, labelName, itemId, objectId, rawDataItemId) if osgLocationId != None: # update the DB with the information in the xml config file if aoType == utils.AO_TYPE_LAB: # Some other params may have changed in the label msg = 'Updating label %s' % labelName print msg logging.info(msg) deleteOSG(cursor, aoType, labelName) osgLocationId = insertOSGLocation(cursor, ao.getchildren()[0], bgSRID, bgOffset) insertDB(cursor, 'OSG_LABEL', ('osg_label_name', 'osg_location_id', 'text', 'red', 'green', 'blue', 'rotate_screen', 'outline', 'font'), (labelName, osgLocationId, ao.get('labelText'), ao.get('labelColorRed'), ao.get('labelColorGreen'), ao.get('labelColorBlue'), ao.get('labelRotateScreen'), ao.get('outline'), ao.get('Font'))) else: msg = 'Updating OSG location %d from %s' % (osgLocationId, uniqueName) print msg logging.info(msg) updateOSGLocation(cursor, osgLocationId, ao.getchildren()[0], bgSRID, bgOffset) else: if aoType == utils.AO_TYPE_OBJ: # It is a bounding that has been moved and it is not currently in the DB. Let's insert it! msg = 'Insert missing OSG_ITEM_OBJECT (%s,%s)' % (itemId, objectId) print msg logging.info(msg) osgLocationId = insertOSGLocation(cursor, ao.getchildren()[0], bgSRID, bgOffset) insertDB(cursor, 'OSG_ITEM_OBJECT', ('item_id', 'object_number', 'osg_location_id'), (itemId, objectId, osgLocationId)) else: # log error if object is not found in DB msg = 'Update not possible. OSG_ITEM_OBJECT from %s not found in DB' % uniqueName print msg logging.error(msg) # Process deletes (only possible for site objects) deleteAOS = data.xpath('//*[@status="deleted"]') # loop over all deletes found in the xml config file for ao in deleteAOS: uniqueName = ao.get('uniqueName') (aoType, itemId, rawDataItemId, objectId, labelName) = utils.decodeOSGActiveObjectUniqueName(cursor, uniqueName) if aoType==None: msg = 'Ignoring operation on %s. Could not decode uniqueName' % uniqueName print msg logging.warning(msg) else: if aoType in (utils.AO_TYPE_OBJ, utils.AO_TYPE_LAB): # check if the object is in the DB osgLocationId = getOSGLocationId(cursor, aoType, labelName, itemId, objectId) if osgLocationId != None: # Delete the OSG-related entries from the DB msg = 'Deleting OSG related entries for %s' % uniqueName print msg logging.info(msg) deleteOSG(cursor, aoType, labelName, itemId, objectId) else: # log error if object is not found in DB msg = 'Not possible to delete. OSG_ITEM_OBJECT from %s not found in DB. Maybe already deleted?' % uniqueName print msg logging.warning(msg) else: # log error if trying to delete a non-site object msg = 'Ignoring delete in %s: Meshes, pictures and PCs can not be deleted' % uniqueName print msg logging.error(msg) # Process new objects (only possible for site objects) newAOS = data.xpath('//*[@status="new"]') # loop over all new objects found in the xml config file for ao in newAOS: uniqueName = ao.get('uniqueName') (aoType, itemId, rawDataItemId, objectId, labelName) = utils.decodeOSGActiveObjectUniqueName(cursor, uniqueName) if aoType==None: msg = 'Ignoring operation on %s. Could not decode uniqueName' % uniqueName print msg logging.warning(msg) else: if aoType in (utils.AO_TYPE_OBJ, utils.AO_TYPE_LAB): # check if the object is in the DBbesafe i osgLocationId = getOSGLocationId(cursor, aoType, labelName, itemId, objectId) if osgLocationId != None: # log error if the new object is already in the DB msg = 'OSG_ITEM_OBJECT from %s already in DB. Ignoring add' % uniqueName print msg logging.warning(msg) else: osgLocationId = insertOSGLocation(cursor, ao.getchildren()[0], bgSRID, bgOffset) if aoType == utils.AO_TYPE_OBJ: # add object to the DB if objectId == utils.ITEM_OBJECT_NUMBER_ITEM: msg = 'Adding missing ITEM %s' % objectId print msg logging.info(msg) insertDB(cursor, 'ITEM', ('item_id', 'background'), (itemId, False)) msg = 'Adding ITEM_OBJECT (%d,%d)' % (itemId, objectId) print msg logging.info(msg) insertDB(cursor, 'ITEM_OBJECT', ('item_id', 'object_number'), (itemId, objectId)) insertDB(cursor, 'OSG_ITEM_OBJECT', ('item_id', 'object_number', 'osg_location_id'), (itemId, objectId, osgLocationId)) else: # add label to the DB msg = 'Adding label %s' % uniqueName print msg logging.info(msg) insertDB(cursor, 'OSG_LABEL', ('osg_label_name', 'osg_location_id', 'text', 'red', 'green', 'blue', 'rotate_screen', 'outline', 'font'), (labelName, osgLocationId, ao.get('labelText'), ao.get('labelColorRed'), ao.get('labelColorGreen'), ao.get('labelColorBlue'), ao.get('labelRotateScreen'), ao.get('outline'), ao.get('Font'))) else: # log error if trying to add a non-site object msg = 'Ignoring new in %s: Meshes, pictures and PCs can not be added' % uniqueName print msg logging.error(msg) # Process the cameras (the DEF CAMs are added for all objects and can not be deleted or updated) cameras = data.xpath('//camera[not(starts-with(@name,"' + utils.DEFAULT_CAMERA_PREFIX + '"))]') # Delete all previous cameras and related entries deleteCameras(cursor) # add all cameras for camera in cameras: name = camera.get('name') itemId = None if name.count(utils.USER_CAMERA): try: itemId = int(name[name.index(utils.USER_CAMERA) + len(utils.USER_CAMERA):].split('_')[0]) except: msg = 'Incorrect name %s for a ITEM camera' % name print msg logging.warn(msg) itemId = None msg = 'Adding camera %s' % name print msg logging.info(msg) osgLocationId = insertOSGLocation(cursor, camera, bgSRID, bgOffset) insertDB(cursor, 'OSG_CAMERA', ('osg_camera_name', 'osg_location_id'), (name, osgLocationId)) if itemId != None: insertDB(cursor, 'OSG_ITEM_CAMERA', ('item_id', 'osg_camera_name'), (itemId, name)) # close DB connection utils.closeConnectionDB(connection, cursor) elapsed_time = time.time() - t0 msg = 'Finished. Total elapsed time: %.02f seconds. See %s' % (elapsed_time, logname) print(msg) logging.info(msg)
''' check if outputdir exists and create if not ''' if not os.path.exists(self.outputdir): os.makedirs(self.outputdir) if __name__ == "__main__": # define argument menu description = 'Get data KNMI reference stations' parser = argparse.ArgumentParser(description=description) # fill argument groups parser.add_argument('-o', '--outputdir', help='Data output directory', default=os.path.join(os.getcwd(),'KNMI'), required=False) parser.add_argument('-s', '--stationid', help='Station id', default='', required=False, action='store') parser.add_argument('-c', '--csvfile', help='CSV data file', required=True, action='store') parser.add_argument('-k', '--keep', help='Keep downloaded files', required=False, action='store_true') parser.add_argument('-l', '--log', help='Log level', choices=utils.LOG_LEVELS_LIST, default=utils.DEFAULT_LOG_LEVEL) # extract user entered arguments opts = parser.parse_args() # define logger logname = os.path.basename(__file__) + '.log' logger = utils.start_logging(filename=logname, level=opts.log) # process data get_knmi_reference_data(opts)
def cli_main(): log_file = 'stdout' # auto-name the log file log_level = '' utils.start_logging(log_file=log_file, log_level='INFO', verbose=False) # start logging parse_arguments()