def __init__(self, dataset_cfg, feature_params, collect_wavs=False, verbose=False): # parent init super().__init__(dataset_cfg, feature_params, collect_wavs=collect_wavs, verbose=verbose) # feature extractor self.feature_extractor = FeatureExtractor(feature_params=self.feature_params) # short vars self.N = self.feature_extractor.N self.hop = self.feature_extractor.hop # create plot plaths if not already exists create_folder(list(self.plot_paths.values())) # recreate if self.dataset_cfg['recreate'] or not check_folders_existance(self.wav_folders, empty_check=True): # delete old data delete_files_in_path(self.wav_folders, file_ext=self.dataset_cfg['file_ext']) # create folder wav folders create_folder(self.wav_folders) # create sets (specific to dataset) self.create_sets() # get audio files from sets self.get_audiofiles() self.get_annotation_files()
def __init__(self, screen, screen_size, fps, capture_path='./ignore/capture/', frame_path='frames/', frame_name='frame', enabled=True): # params self.screen = screen self.screen_size = screen_size self.fps = fps # paths self.capture_path = capture_path self.frame_path = frame_path self.frame_name = frame_name # enabled self.enabled = enabled # delete old data delete_png_in_path(self.capture_path + self.frame_path) # create folder for captured frames create_folder([self.capture_path + self.frame_path]) # vars self.actual_frame_num = 0 self.frame_container = [] # downsample of fps self.downsample = 2 self.downsample_count = 0
def test_move_folder(): setup_test_files() # delete the working dir inside tmp dir create_folder(f'{test_folder_path}/FolderToBeMoved') move_folder(f'{test_folder_path}/FolderToBeMoved', f'{test_folder_path}/working/FolderToBeMoved') # set up folders list tmp_folders = [] working_folders = [] # find all folders in the tmp dir and append them to the folders list for folder in (objects for objects in os.listdir(f'{test_folder_path}/') if os.path.isdir(os.path.join(f'{test_folder_path}/', objects))): tmp_folders.append(folder) # find all folders in the tmp dir and append them to the folders list for folder in (objects for objects in os.listdir(f'{test_folder_path}/working/') if os.path.isdir(os.path.join(f'{test_folder_path}/working/', objects))): working_folders.append(folder) assert tmp_folders[0] == 'working' assert len(tmp_folders) == 1 assert working_folders[0] == 'FolderToBeMoved' assert len(working_folders) == 1 teardown_test_files()
def __init__(self): """Generic initialization code.""" # session folder (create if missing) self.session_folder = '../sessions' create_folder(self.session_folder) # log folder (create if missing) self.log_folder = f'{self.session_folder}/logs' create_folder(self.log_folder) # work folder (create and clear) self.work_folder = f'{self.session_folder}/{script_name()}/work' clear_folder(self.work_folder) # configuration engines self.config = None self.option = None # database self.database = None self.target_db_conn = None # parameter driven self.dataset_id = '' self.table_name = '' # since we start logging before we read config/options we log to known path vs dataset specific path log_setup(log_file_name=f'{self.log_folder}/{script_name()}.log') log_session_info()
def __init__(self, screen, cfg_game, frame_name='frame', root_path='./'): # arguments self.screen = screen self.cfg_game = cfg_game self.frame_name = frame_name self.root_path = root_path # shortcuts self.screen_size = cfg_game['screen_size'] self.fps = cfg_game['fps'] # paths self.paths = dict( (k, self.root_path + v) for k, v in self.cfg_game['paths'].items()) # delete old data delete_files_in_path([self.paths['frame_path']], file_ext='.png') # create folder for captured frames create_folder(list(self.paths.values())) # vars self.actual_frame_num = 0 self.frame_container = [] # downsample of fps self.downsample = 2 self.downsample_count = 0
def extract_features(self): """ extract mfcc features and save them """ print("\n--feature extraction:") # create folder structure create_folder(self.feature_folders) for i, (set_name, wavs, annos) in enumerate(zip(self.set_names, self.set_audio_files, self.set_annotation_files)): print("{}) extract set: {} with label num: {}".format(i, set_name, len(wavs))) # examples with splits n_examples = int(self.dataset_cfg['n_examples'] * self.dataset_cfg['split_percs'][i]) # extract data x, y, t, index = self.extract_mfcc_data(wavs=wavs, annos=annos, n_examples=n_examples, set_name=set_name) if self.feature_params['use_mfcc_features'] else self.extract_raw_data(wavs=wavs, annos=annos, n_examples=n_examples, set_name=set_name) # add noise if requested if self.dataset_cfg['add_noise'] and self.feature_params['use_mfcc_features']: x, y, index = self.add_noise_to_dataset(x, y, index, n_examples) # print label stats self.label_stats(y) # save mfcc data file np.savez(self.feature_files[i], x=x, y=y, t=t, index=index, params=self.feature_params) print("--save data to: ", self.feature_files[i])
def run(self): """ Options --onetime[=1] run once, then exit; use if called by an external scheduler. --nowait[=1] run immediately without waiting for scheduler to determine execution. """ # exit if __init__ didn't find a valid project file if not self.project_name: return # display application banner app_name = script_name() print(f'UDP {app_name.title()} {self.project_name}') copyright_year = f'{now():%Y}' copyright_message = f'Copyright (c) 2018-{copyright_year} Alterra Mountain Company, Inc.' print(copyright_message) # make sure root sessions folder exists create_folder(self.session_folder) # since we start logging before we read config/options we log to known path vs dataset specific path log_setup( log_file_name=f'{self.session_folder}/{self.project_name}.log') log_session_info() # common setup self.setup() # application specific startup logic self.start() # scheduling behavior based on --onetime, --nowait option if self.option('onetime') == '1': # one-time run; use when this script is being called by an external scheduler logger.info('Option(onetime=1): executing once') self.main() else: if self.option('nowait') == '1': # no-wait option; execute immediately without waiting for scheduler to initiate logger.info( 'Option(nowait=1): executing immediately, then following regular schedule' ) self.main() # standard wait for scheduled time slot and run logic while True: self.progress_message('waiting for next job ...') if self.schedule.wait(): self.main() if self.option('scheduled_onetime') == '1': logger.info( 'Option(scheduled_onetime=1): ran once at first scheduled timeslot' ) break else: break self.cleanup()
def __init__(self, classifier, mic_params, is_audio_record=False, root_path='./'): # arguments self.classifier = classifier self.mic_params = mic_params self.is_audio_record = is_audio_record self.root_path = root_path # plot path self.plot_path = self.root_path + self.mic_params['plot_path'] # create folder for plot path create_folder([self.plot_path]) # shortcuts self.feature_params = classifier.feature_params # feature extractor self.feature_extractor = FeatureExtractor(self.feature_params) # windowing params self.N, self.hop = self.feature_extractor.N, self.feature_extractor.hop # queue self.q = queue.Queue() # collector self.collector = Collector( N=self.N, hop=self.hop, frame_size=self.feature_params['frame_size'], update_size=self.mic_params['update_size'], frames_post=self.mic_params['frames_post'], is_audio_record=self.is_audio_record) # device self.device = sd.default.device[0] if not self.mic_params[ 'select_device'] else self.mic_params['device'] # determine downsample self.downsample = self.mic_params['fs_device'] // self.feature_params[ 'fs'] # get input devices self.input_dev_dict = self.extract_devices() # show devices print("\ndevice list: \n", sd.query_devices()) print("\ninput devs: ", self.input_dev_dict.keys()) # stream self.stream = None # change device flag self.change_device_flag = False
def create_ml_folders(self): """ create all necessary folders for ml """ # create folder create_folder( list(self.cfg['ml']['paths'].values()) + [self.model_path])
def saveImage(redis_service, host, key): image = redis_service.rpop(key) if image: print('服务器:%s, 获取图片信息:%s' % (host, image)) image = json.loads(image) common.create_folder(image_dir + '/img/' + image['mainid']) downloadImage(image['filename'], image['url']) else: print('服务器:%s 图片数据为空' % host)
def create(self, resource): """Create a container within resource's storage account.""" self._load_resource(resource) blob_folder = self._blob_folder() if is_folder(blob_folder): logger.warning(self._context("Container already exists")) else: # create new container logger.info(self._context("Creating container")) create_folder(blob_folder) self.disconnect() return is_folder(blob_folder)
def create_audio_dataset_folders(self): """ create all necessary folders for audio dataset """ # mfcc paths for output mfcc_paths = [ p + self.param_path_audio_dataset for p in list(self.cfg['audio_dataset']['data_paths'].values()) ] # create folder create_folder(self.wav_folders_audio_dataset + mfcc_paths + list(self.cfg['audio_dataset']['plot_paths'].values()))
def create_my_recording_folders(self): """ create all necessary folders for my recordings """ # output path output_path = self.cfg['my_recordings'][ 'out_path_root'] + self.param_path_my_recordings # create folder create_folder([ self.cfg['my_recordings']['plot_path'], self.cfg['my_recordings']['wav_path'], output_path ])
def cumulative_chart_to_file(data, out_folder, category_to_skip): c.create_folder(join(out_folder, CHART_FOLDER)) out_and_in = data.collapsed_data_by_month(category_to_skip) diff = map(lambda (a,b) : -a + b, out_and_in) cumulative = [diff[0]] for i in range(1,len(diff)): cumulative.append(cumulative[i - 1] + diff[i]) transposed = zip(*out_and_in) m_in = list(transposed[1]) m_out = map(lambda x : -x,list(transposed[0])) file_name = draw_chart( TOTAL_CHART_NAME, data.months, (m_in,m_out,diff,cumulative), ('in', 'out', 'diff', TOTAL_LABEL), out_folder, cstyle=LightSolarizedStyle) return join(CHART_FOLDER,file_name)
def download_prepack_file(build_number, language): build_dst = const.HF_Working_Folder + "\\Build\\" + language + "\\B" + build_number filename = 'Prepack.zip' if not os.path.exists(build_dst + "\\" + filename): if not os.path.exists(build_dst): build_dst = common.create_folder(build_dst) prepack_download_from_ftp(filename, build_dst, build_number, language)
def run(self): common.create_folder(config.image_dir + '/img') while True: image = self.redis.rpop('images') if image: image = json.loads(image) print(image) common.create_folder(config.image_dir + '/img/' + image['mainid']) #common.create_folder(config.image_dir + '/' + image['mainid']) self.__saveImage(image['filename'], image['url']) time.sleep(0.5) else: time.sleep(10) pass
def test_create_folder(): setup_test_files() create_folder(f'{test_folder_path}/createdfolder') # set up folders list folders = [] # find all folders in the tmp dir and append them to the folders list for folder in (objects for objects in os.listdir(f'{test_folder_path}/') if os.path.isdir(os.path.join(f'{test_folder_path}/', objects))): folders.append(folder) # sort the list alphabetically folders.sort() # assert that the directories we expect exist assert folders[0] == 'createdfolder' assert folders[1] == 'working' assert len(folders) == 2 teardown_test_files()
def __init__(self, tenant): # tenant self.tenant = force_local_path(tenant.lower()) # make sure we have a tenant create_folder(self.tenant) self.ad_file_name = force_file_ext(f'{self.tenant}/ad', 'json') # start in a non-authenticated state self.identity = None self.is_authenticated = False # load our directory if is_file(self.ad_file_name): self._load() else: self.identities = dict()
def setup_test_files(): # ensure clean (empty) ../tmp folder teardown_test_files() # set up files create_folder(test_folder_path) readonly_file_name = f'{test_folder_path}/readonly.txt' readwrite_file_name = f'{test_folder_path}/readwrite.txt' # ... OR BEST ... leverage common's save_text() save_text(readwrite_file_name, 'Hello world') # create a read only file save_text(readonly_file_name, 'Hello world') # do this type of operation after a file is closed os.chmod(readonly_file_name, S_IREAD | S_IRGRP | S_IROTH) # create a working dir in tmp folder create_folder(f'{test_folder_path}/working')
def put(self, source_file_name, blob_name): """"Upload source file name to blob.""" if not is_file(source_file_name): warning_message = f"Source file does not exist ({source_file_name})" logger.warning(self._context(warning_message, blob_name)) is_success = False else: logger.debug(self._context(f"Putting {source_file_name}", blob_name)) # build blob target file and folder names blob_folder = self._blob_folder() target_file_name = f"{blob_folder}/{blob_name}" target_folder = just_path(target_file_name) # make sure the blob's target folder exists create_folder(target_folder) # then copy source file to blob container copy_file_if_exists(source_file_name, target_file_name) is_success = True return is_success
def run(self, *args, **kwargs): """ Options --onetime[=1] run once, then exit; use if called by an external scheduler. --nonwait[=1] run immediately without waiting for scheduler to determine execution. """ # make sure root sessions folder exists create_folder('../sessions') # TODO: We start logging before we read config and options so we don't know datapool or anything else. # TODO: We should log to a default app log and then SWITCH LOG file over after we process options and # TODO: and config files ??? (2018-09-25) log_setup(log_file_name=f'../sessions/{script_name()}.log') log_session_info() self.setup(*args, **kwargs) self.start() # scheduling behavior based on --onetime, --nowait option if self.option('onetime') == '1': # one-time run; use when this script is being called by an external scheduler logger.info('Option(onetime=1): executing once') self.main() else: if self.option('nowait') == '1': # no-wait option; execute immediately without waiting for scheduler to initiate logger.info('Option(nowait=1): executing immediately, then following regular schedule') self.main() # standard wait for scheduled time slot and run logic while True: if self.schedule.wait(): self.main() else: break self.cleanup()
def category_chart_to_file(data, out_folder): file_names = [] c.create_folder(join(out_folder, CHART_FOLDER)) for category in data.categories: money = data.collapsed_data_by_month_for(category) money_out = zip(*money)[0] money_in = zip(*money)[1] entries_out = zip(*money)[2] entries_in = zip(*money)[3] logging.debug("__CHART_FOR " + category + "\n\t" + str(data.months) + "\n\t" + str(money_in) + "\n\t" + str(money_out)) if all(value == 0 for value in money_in): file_name = draw_chart(category, data.months, (money_out,), (MONEY_OUT_LABEL,), out_folder, cstyle=LightColorizedStyle, det_lists=(entries_out,)) else: money_out = map(lambda y : -y, money_out) file_name = draw_chart(category, data.months, (money_in, money_out), (MONEY_IN_LABEL, MONEY_OUT_LABEL), out_folder, det_lists=(entries_in,entries_out)) file_names.append(join(CHART_FOLDER, file_name)) return sorted(file_names)
def setup(self): """Generic setup code.""" # load standard config self.config = ConfigSectionKey('../conf', '../local') self.config.load('bootstrap.ini', 'bootstrap') self.config.load('init.ini') self.config.load('connect.ini') # load utility specific options using # env variable = UDP_<SCRIPT-NAME>; Option() retrieves command line options self.option = Option(f'udp_{script_name()}') # create/clear work folder self.work_folder = f'{self.session_folder}/{script_name()}/work' create_folder(self.work_folder) # display application banner # TODO: This should be a banner method() print(f'UDP {script_name()} utility') print(f'Alterra Unified Data Platform') copyright_year = f'{now():%Y}' copyright_message = f'Copyright (c) 2018-{copyright_year} Alterra Mountain Company, Inc.' print(copyright_message)
def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit=-1): path_datadir_tmp = os.getenv("DIP_HOME") + '/tmp/' FOLDER_NAME = 'tmp_census_us_' + self.P_CENSUS_CONTENT P_CENSUS_TYPE = self.P_CENSUS_CONTENT[:3] CENSUS_TYPE = str( census_resources.dict_vintage_[self.P_CENSUS_CONTENT[:3]]) fields_list = self.P_census_fields.split(',') #----------------------------------------- BASE FOLDER logger.info('1/6 Creating base folders...') common.create_folder(path_datadir_tmp, FOLDER_NAME, False) common.create_folder(path_datadir_tmp + '/' + FOLDER_NAME + '/', self.P_CENSUS_LEVEL, False) #----------------------------------------- SOURCE HARVESTER state_list_ = self.P_state_list_str.split(',') state_conversion = common.state_to_2letters_format( self.P_STATES_TYPE_NAME, state_list_) state_list = state_conversion[0] state_list_rejected = state_conversion[1] dict_states = state_conversion[2] s_found = len(state_list) s_rejected = len(state_list_rejected) logger.info('----------------------------------------') logger.info('First diagnostic on input dataset') logger.info('----------------------------------------') if s_found > 0: logger.info( 'States expected to be processed if enough records for feature selection:' ) logger.info(state_list) logger.info('States rejected:') if s_rejected < 60: logger.info(state_list_rejected) else: logger.info( '...too many elements rejected for displaying it in the log...' ) if self.P_USE_PREVIOUS_SOURCES is False: logger.info('2/6 Collecting US Census Data...') else: logger.info('2/6 Re using US Census Data if available...') sources_collector = common.us_census_source_collector( self.P_USE_PREVIOUS_SOURCES, P_CENSUS_TYPE, self.P_CENSUS_CONTENT, self.P_CENSUS_LEVEL, path_datadir_tmp, FOLDER_NAME, state_list, dict_states) sumlevel_val = sources_collector[0] fdef_dir = sources_collector[1] geo_header_file = sources_collector[2] dict_pattern_files = sources_collector[3] geo_header_file_dir = fdef_dir + '/' + geo_header_file geo_header = pd.read_excel(geo_header_file_dir, sheet_name=0, header=0) #sheetname census_level_code_len = census_resources.dict_level_corresp['v1'][ self.P_CENSUS_LEVEL]['code_len'] logger.info('4/6 Generating census...') final_output_df = pd.DataFrame() for state in state_list: logger.info('Processing this state: %s' % (state)) state_dir = path_datadir_tmp + FOLDER_NAME + '/' + state if self.P_CENSUS_LEVEL in ('TRACT', 'BLOCK_GROUP'): ziptocollect = dict_pattern_files['v1']['TB'] state_dir_level = state_dir + '/' + 'TRACT_BG_SEG' else: ziptocollect = dict_pattern_files['v1']['OT'] state_dir_level = state_dir + '/' + 'NO_TRACT_BG_SEG' ustate = state.upper() state_name = dict_states[state]['attributes'][ 'state_fullname_w1'] state_number = dict_states[state]['attributes'][ 'state_2digits'] vint = census_resources.dict_vintage_[P_CENSUS_TYPE][ self.P_CENSUS_CONTENT] master_segment_file = state_dir_level + '/' + vint[ 'master_segment_file_pattern'] + vint[ 'vintage_pattern'] + state + '.csv' geo_source_df = pd.read_csv(master_segment_file, sep=',', header=None, names=geo_header.columns) geo_level_df = geo_source_df[geo_source_df['SUMLEVEL'].isin( sumlevel_val)].copy() geo_level_df['GEOID_DKU'] = geo_level_df['GEOID'].map( lambda x: x.split('US')[1]) geo_level_df[self.P_CENSUS_LEVEL] = geo_level_df[ 'GEOID_DKU'].map(lambda x: x[:census_level_code_len]) keep_cols = [ 'FILEID', 'SUMLEVEL', 'GEOID_DKU', 'STUSAB', 'LOGRECNO' ] geo_level_df = geo_level_df[keep_cols] geo_level_df['STUSAB'] = geo_level_df['STUSAB'].map( lambda x: x.lower()) ## basically the state lower del geo_level_df['FILEID'] del geo_level_df['SUMLEVEL'] ### added n = 0 for fr in os.listdir(state_dir_level): if fr.startswith( vint['segments_estimations_files_pattern']): n += 1 segment_list = [] for i in range(1, n + 1): if i < 10: segment_list.append('000' + str(i)) if i in range(10, 100): segment_list.append('00' + str(i)) if i >= 100: segment_list.append('0' + str(i)) nb_segments = len(segment_list) i = 1 for segment_number in segment_list: i = i + 1 logger.info('Processing segment: %s/%s' % (i, nb_segments)) template_fields_def = census_resources.dict_vintage_[ P_CENSUS_TYPE][ self.P_CENSUS_CONTENT]['fields_definition'] seq_folder_name = template_fields_def['folder_name'] ## For taking into account that some vintage like ACS52013 does not have a structure with the template and a folder ## If no template, recreate the same structure as the alternative one. if seq_folder_name == '': seq_folder_name = template_fields_def[ 'geo_header_template_folder_name'] try: HEADER_PATH_FILE = fdef_dir + '/' + seq_folder_name + '/Seq' + str( int(segment_number) ) + template_fields_def['seq_files_extension'] header_df = pd.read_excel( HEADER_PATH_FILE, sheet_name=0) ### 0 = 'E' #sheetname except: HEADER_PATH_FILE = fdef_dir + '/' + seq_folder_name + '/seq' + str( int(segment_number) ) + template_fields_def['seq_files_extension'] header_df = pd.read_excel( HEADER_PATH_FILE, sheet_name=0) ### 0 = 'E' #sheetname ### Adjust the header to fit what we need. kh_list = [ 'FILEID', 'FILETYPE', 'STUSAB', 'CHARITER', 'SEQUENCE', 'LOGRECNO' ] f_list = [x for x in header_df.columns if x not in kh_list] E_list = [x + 'E' for x in f_list] newcolz_list = kh_list + E_list t_ = [c for c in newcolz_list if c in fields_list] if len(t_) > 0: SEGMENT_PATH_FILE = state_dir_level + '/' + vint[ 'segments_estimations_files_pattern'] + vint[ 'vintage_pattern'] + state + segment_number + '000.txt' segment_df = pd.read_csv(SEGMENT_PATH_FILE, sep=',', names=newcolz_list, low_memory=False) out_list = kh_list + t_ out_list.remove('FILEID') out_list.remove('FILETYPE') out_list.remove('CHARITER') out_list.remove('SEQUENCE') segment_df = segment_df[out_list] geo_level_df = pd.merge( left=geo_level_df, right=segment_df, how='inner', left_on=['STUSAB', 'LOGRECNO'], right_on=['STUSAB', 'LOGRECNO']) logger.info('-------------- volumes check------------------') logger.info(geo_level_df.groupby('STUSAB').size()) logger.info('Check Tallies here :') logger.info( 'https://www.census.gov/geo/maps-data/data/tallies/tractblock.html' ) logger.info('----------------------------------------------') #del geo_level_df['STUSAB'] del geo_level_df['LOGRECNO'] if self.P_STATES_TYPE_NAME is not 'state_2letters': geo_level_df[self.P_STATES_TYPE_NAME] = dict_states[state][ 'attributes'][self.P_STATES_TYPE_NAME] logger.info('5/6 Building final output...') final_output_df = pd.concat((final_output_df, geo_level_df), axis=0) if self.P_DELETE_US_CENSUS_SOURCES is True: logger.info('6/6 Removing US Census temp data from: %s' % (path_datadir_tmp + FOLDER_NAME)) cmd = "rm -rf %s" % (path_datadir_tmp + FOLDER_NAME) os.system(cmd) else: logger.info('6/6 Keeping US Census data sources in: %s' % (path_datadir_tmp + FOLDER_NAME)) for f in os.listdir(path_datadir_tmp + FOLDER_NAME): if not f.endswith('.zip'): cmd = "rm -rf %s" % (path_datadir_tmp + FOLDER_NAME + '/' + f) os.system(cmd) for i, line in final_output_df.iterrows(): yield line.to_dict() else: logger.info('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') logger.info( 'US Census CANNOT be built, no states available in your dataset...' ) logger.info('Check the following settings :') logger.info( '-> are the states in the right format regarding the plugin set by the user ?' ) logger.info('-> is the column really containing states ?') logger.info('----------------------------------------')
def main(self): db = None try: # get job id and table history job_history_file_name = f'{self.state_folder_name}/capture.job' job_history = JobHistory(job_history_file_name) job_history.load() job_id = job_history.job_id self.job_id = job_id logger.info(f'\nCapture job {job_id} for {self.namespace} ...') # track job (and table) stats self.stats = Stats(f'{self.work_folder_name}/job.log', namespace=self.namespace, job_id=job_id) self.stats.start('capture', 'job') # track overall job row count and file size self.job_row_count = 0 self.job_file_size = 0 # create/clear job folders create_folder(self.state_folder_name) clear_folder(self.work_folder_name) clear_folder(self.publish_folder_name) # _connect to source database db = None db_engine = None if self.database.platform == 'postgresql': db = database.PostgreSQL(self.database) db_engine = database.Database('postgresql', db.conn) elif self.database.platform == 'mssql': db = database.MSSQL(self.database) db_engine = database.Database('mssql', db.conn) # cursor = db.conn.cursor() # determine current timestamp for this job's run # get current_timestamp() from source database with step back and fast forward logic current_timestamp = self.current_timestamp(db_engine) # process all tables self.stats.start('extract', 'step') for table_name, table_object in self.table_config.sections.items(): table_history = job_history.get_table_history(table_name) self.process_table(db, db_engine, self.database.schema, table_name, table_object, table_history, current_timestamp) self.stats.stop('extract', self.job_row_count, self.job_file_size) # save interim job stats to work_folder before compressing this folder self.stats.stop('capture', self.job_row_count, self.job_file_size) self.stats.save() # compress work_folder files to publish_folder zip file self.compress_work_folder() # upload publish_folder zip file self.upload_to_objectstore() # save final stats for complete job run self.stats.stop('capture', self.job_row_count, self.job_file_size) self.stats.save(f'{self.state_folder_name}/last_job.log') self.stats.save() # update job_id and table histories job_history.save() # compress capture_state and save to capture objectstore for recovery self.save_recovery_state_file() # update schedule's poll message last_job_info = f'last job {self.job_id} on {datetime.datetime.now():%Y-%m-%d %H:%M}' schedule_info = f'schedule: {self.schedule}' self.schedule.poll_message = f'{script_name()}({self.namespace}), {last_job_info}, {schedule_info}' # force unhandled exceptions to be exposed except Exception: logger.exception('Unexpected exception') raise finally: # explicitly close database connection when finished with job with contextlib.suppress(Exception): db.conn.close()
else: print('服务器:%s 图片数据为空' % host) def downloadImage(filename, url): try: filename = image_dir + filename print(url) request.urlretrieve(url, filename) pass except Exception as e: print(e) global image_dir image_dir = config.image_dir #创建文件夹 common.create_folder(image_dir + '/img') threads = [] for config in config.redis_services: thread = threading.Thread(target=getImage, args=(config, )) threads.append(thread) for t in threads: t.setDaemon(True) t.start() t.join()
from pathlib import Path import os import numpy as np from renderer import PbrtRenderer from common import create_folder from project_path import root_path if __name__ == '__main__': # Create a folder to store the information. output_folder = Path('bunny') create_folder(output_folder) # The asset folder. asset_folder = Path(root_path) / 'asset' # Create the render. options = { 'file_name': str(output_folder / 'demo.png'), 'light_map': 'uffizi-large.exr', 'sample': 4, 'max_depth': 4, 'camera_pos': (0, -2, 0.8), 'camera_lookat': (0, 0, 0), 'camera_up': (0, 0, 1), } renderer = PbrtRenderer(options) # Add the bunny to the scene. renderer.add_tri_mesh(asset_folder / 'mesh/bunny.obj', transforms=[
import pandas as pd, numpy as np from dataiku import pandasutils as pdu import requests #import time from dataiku.customrecipe import * import sys import re import geocoder_utils import common import os logging.info('1/6 Creating base folder...') path_datadir_tmp = dataiku.get_custom_variables()["dip.home"] + '/tmp/' P_CENSUS_CONTENT = 'geocoder' FOLDER_NAME = 'tmp_census_us_' + P_CENSUS_CONTENT common.create_folder(path_datadir_tmp, FOLDER_NAME, True) input_name = get_input_names_for_role('input')[0] output_ = get_output_names_for_role('output')[0] output_dataset = dataiku.Dataset(output_) P_COL_STREET = get_recipe_config()['p_col_street'] P_COL_CITY = get_recipe_config()['p_col_city'] P_COL_STATE = get_recipe_config()['p_col_state'] P_COL_ZIPCODE = get_recipe_config()['p_col_zipcode'] P_BENCHMARK = get_recipe_config()['p_benchmark'] P_VINTAGE = get_recipe_config()['p_vintage'] if P_BENCHMARK == "9":
def __init__(self, cfg_ml, audio_dataset, batch_archive, net_handler, sub_model_path=None, encoder_label='', root_path='./'): # arguments self.cfg_ml = cfg_ml self.audio_dataset = audio_dataset self.batch_archive = batch_archive self.net_handler = net_handler self.sub_model_path = sub_model_path self.encoder_label = encoder_label self.root_path = root_path # paths self.paths = dict( (k, self.root_path + v) for k, v in self.cfg_ml['paths'].items()) # param path ml self.param_path_ml = 'bs-{}_it-{}_lr-{}/'.format( self.cfg_ml['train_params']['batch_size'], self.cfg_ml['train_params']['num_epochs'], str(self.cfg_ml['train_params']['lr']).replace('.', 'p')) # adv param path self.adv_params_path = 'l{}p{}d{}_itl-{}_itp-{}/'.format( int(self.cfg_ml['adv_params']['label_train']), int(self.cfg_ml['adv_params']['pre_train']), int(self.cfg_ml['adv_params']['use_decoder_weights']), self.cfg_ml['adv_params']['num_epochs_label'], self.cfg_ml['adv_params']['num_epochs_pre']) # model path self.model_path = self.paths['model'] + self.cfg_ml[ 'nn_arch'] + '/' + self.audio_dataset.param_path + self.param_path_ml # create model path before (useful for sub model path changes with ../) create_folder([self.model_path]) # sub dir if self.sub_model_path is not None: # add param if self.sub_model_path.find(cfg_ml['conv_folder']) != -1: self.model_path = self.model_path + self.sub_model_path + self.adv_params_path # new sub dir for encoder label if len(self.encoder_label): self.model_path = self.model_path + encoder_label + '/' # model path folders self.model_path_folders = dict( (k, self.model_path + v) for k, v in self.cfg_ml['model_path_folders'].items()) # model file self.model_files = [ self.model_path + model_name + '_' + self.cfg_ml['model_file_name'] for model_name, v in net_handler.models.items() ] self.model_pre_files = [ self.paths['model_pre'] + model_name + '_' + '{}_c-{}.pth'.format( self.cfg_ml['nn_arch'], self.batch_archive.n_classes) for model_name, v in net_handler.models.items() ] # encoder decoder available enc, dec = net_handler.get_nn_arch_has_conv_coder() self.encoder_model_file = self.model_path + self.cfg_ml[ 'encoder_model_file_name'] if enc else None self.decoder_model_file = self.model_path + self.cfg_ml[ 'decoder_model_file_name'] if dec else None # params and metrics files self.params_file = self.model_path + self.cfg_ml['params_file_name'] self.metrics_file = self.model_path + self.cfg_ml['metrics_file_name'] self.info_file = self.model_path + self.cfg_ml['info_file_name'] self.score_file = self.model_path + self.cfg_ml['score_file_name'] # image list (for adversarial) self.img_list = [] # create ml folders create_folder( list(self.paths.values()) + [self.model_path] + list(self.model_path_folders.values())) # config logging.basicConfig(filename=self.paths['log'] + 'ml.log', level=logging.INFO, format='%(asctime)s %(message)s') # disable unwanted logs logging.getLogger( 'matplotlib.font_manager').disabled, logging.getLogger( 'matplotlib.colorbar').disabled, logging.getLogger( 'matplotlib.animation').disabled = True, True, True # load pre trained model if self.cfg_ml['load_pre_model']: self.net_handler.load_models(model_files=self.model_pre_files)
def main(self): db = None try: # track dataset name for naming generated files and folders self.dataset_name = self.namespace.dataset # get job id and table history job_history_file_name = f'{self.state_folder}/capture.job' job_history = JobHistory(job_history_file_name) job_history.load() job_id = job_history.job_id self.job_id = job_id logger.info(f'\nCapture job {job_id} for {self.dataset_name} ...') self.progress_message(f'starting job {job_id} ...') # track job (and table) metrics dataset_id = self.namespace.dataset self.events = Events(f'{self.work_folder}/job.log', dataset_id=dataset_id, job_id=job_id) self.events.start('capture', 'job') # track overall job row count and file size self.job_row_count = 0 self.job_data_size = 0 # create/clear job folders create_folder(self.state_folder) clear_folder(self.work_folder) clear_folder(self.publish_folder) # connect to source database self.database = self.config(self.project.database_source) if self.database.platform == 'postgresql': db = database.PostgreSQL(self.database) db_engine = database.Database('postgresql', db.conn) elif self.database.platform == 'mssql': db = database.MSSQL(self.database) db_engine = database.Database('mssql', db.conn) else: raise NotImplementedError( f'Unknown database platform ({self.database.platform})') # determine current timestamp for this job's run # get current_timestamp() from source database with step back and fast forward logic current_timestamp = self.current_timestamp(db_engine) # process all tables self.events.start('extract', 'step') # build dict of table objects indexed by table name self.tables = dict() for section_name, section_object in self.config.sections.items(): if section_name.startswith('table:'): table_name = section_name.partition(':')[2] self.tables[table_name] = section_object # extract data from each table for table_name, table_object in self.tables.items(): table_history = job_history.get_table_history(table_name) # get current_sequence from source database if table_object.cdc == 'sequence': current_sequence = db_engine.current_sequence(table_name) else: current_sequence = 0 self.process_table(db, db_engine, self.database.schema, table_name, table_object, table_history, current_timestamp, current_sequence) self.events.stop('extract', self.job_row_count, self.job_data_size) # save interim job metrics to work_folder before compressing this folder self.events.stop('capture', self.job_row_count, self.job_data_size) self.events.save() # compress work_folder files to publish_folder zip file self.compress_work_folder() # upload publish_folder zip file self.upload_to_blobstore() # save final metrics for complete job run self.events.stop('capture', self.job_row_count, self.job_data_size) self.events.save(f'{self.state_folder}/last_job.log') self.events.save() # update job_id and table histories if not self.option('notransfer'): # only save job history if we're transferring data to landing job_history.save() # compress capture_state and save to capture blobstore for recovery self.save_recovery_state_file() # update schedule's poll message last_job_info = f'last job {self.job_id} on {datetime.datetime.now():%Y-%m-%d %H:%M}' schedule_info = f'schedule: {self.schedule}' self.schedule.poll_message = f'{script_name()}({self.dataset_name}), {last_job_info}, {schedule_info}' # force unhandled exceptions to be exposed except Exception: logger.exception('Unexpected exception') raise finally: # explicitly close database connection when finished with job with contextlib.suppress(Exception): db.conn.close()
] df2 = df[keep_columns_list] df2_unique = df2.groupby(P_CENSUS_LEVEL_COLUMN).size().reset_index() df2_unique = df2_unique[[P_CENSUS_LEVEL_COLUMN]] df2_unique[P_CENSUS_LEVEL_COLUMN] = df2_unique[ P_CENSUS_LEVEL_COLUMN].astype('int64') df2n = df2.groupby(P_COLUMN_STATES).size().reset_index() df2n.rename(columns={0: 'nb'}, inplace=True) #----------------------------------------- BASE FOLDER print '1/6 Creating base folders...' common.create_folder(path_datadir_tmp, FOLDER_NAME, False) common.create_folder(path_datadir_tmp + '/' + FOLDER_NAME + '/', P_CENSUS_LEVEL, False) #----------------------------------------- SOURCE HARVESTER if P_USE_PREVIOUS_SOURCES is False: print '2/6 Collecting US Census Data...' else: print '2/6 Re using US Census Data if available...' sources_collector = common.us_census_source_collector( P_USE_PREVIOUS_SOURCES, P_CENSUS_TYPE, P_CENSUS_CONTENT, P_CENSUS_LEVEL, path_datadir_tmp, FOLDER_NAME, state_list, dict_states)
import yaml import matplotlib.pyplot as plt import soundfile from plots import plot_waveform from common import create_folder from path_collector import PathCollector # yaml config file cfg = yaml.safe_load(open("./config.yaml")) # init path collector path_coll = PathCollector(cfg) # create folder create_folder([cfg['mic_params']['plot_path']]) # window and hop size N, hop = int( cfg['feature_params']['N_s'] * cfg['feature_params']['fs']), int( cfg['feature_params']['hop_s'] * cfg['feature_params']['fs']) # classifier classifier = Classifier(path_coll=path_coll, verbose=True) # create mic instance mic = Mic(classifier=classifier, feature_params=cfg['feature_params'], mic_params=cfg['mic_params'], is_audio_record=True)