def setup(): parser = argparse.ArgumentParser() parser.add_argument('--shuffle', action='store_true') parser.add_argument('--dataset-dir', default='dataset', type=str, metavar='DIR') parser.add_argument('--nolabel-dir', default='nolabel', type=str, metavar='DIR') #parser.add_argument('--label-master', default='label_master.tsv', type=str, metavar='FILE') parser.add_argument('--train-master', default='train_master.tsv', type=str, metavar='FILE') parser.add_argument('--validate_ratio', default=0.2, type=float, help='validate (default==>2/10)') parser.parse_args() args = parser.parse_args() nolabel_dir = os.path.join(args.dataset_dir, args.nolabel_dir) train_dir = os.path.join(args.dataset_dir, 'train') validate_dir = os.path.join(args.dataset_dir, 'validate') #print('==> Getting all labels..') #all_labels = get_all_labels(os.path.join(args.dataset_dir, args.label_master)) #label_div = dict() #for label in all_labels: # label_div[label] = list() print('==> Getting dataset information..') ds_info = get_dataset_info( os.path.join(args.dataset_dir, args.train_master)) label_div = dict() for file_name, label in ds_info: if label not in label_div: label_div[label] = list() label_div[label].append([file_name, label]) print('==> Deviding a dataset..') train_ds = list() validate_ds = list() for _, dataset in label_div.items(): n_ds = len(dataset) div_i = n_ds - int(n_ds * args.validate_ratio) if args.shuffle: random.shuffle(dataset) train_ds.extend(dataset[:div_i]) validate_ds.extend(dataset[div_i:]) print('==> Moving a dataset..') move_files(train_ds, nolabel_dir, train_dir) move_files(validate_ds, nolabel_dir, validate_dir) os.makedirs('weights', exist_ok=True)
def move_to_folders( path: Path = Argument(default='.', exists=True, file_okay=True, dir_okay=True, readable=True, resolve_path=True), create_folders: bool = Option(default=False, ), ): move_files(path, create_folders)
def place_trials_default(expnum, start_time, end_time, verbose=False): """This is going to be the primary way of moving processed data from it's proper location to the PEN tool's subfolder. As long as the data is organized with our standard format where the metadata is located on the mysql database, this will handle all the uploading. WARNING: Currently this will not realize if you've pointed it to a folder that it already uploaded.""" destination = experiment_path[expnum] current_trial = utils.find_last_trial(expnum) + 1 existing_evid_dict = caching.load_evid_dictionary(expnum) event_data_dicts = smysql.retrieve_event_description(start_time, end_time, list_of_sites=mySQL_sitedef[expnum]) default_folder = smysql.retrieve_data_folder() # Look at every event in the database between time constraints. for event in event_data_dicts: site_evt_number = event[cfg_evt_siteEvt] site_evt_time = event[cfg_evt_time] site_event_id = event[cfg_evt_evid] site_event_dist = event[cfg_evt_dist] site_event_ml = event[cfg_evt_ml] file_data_dicts = smysql.retrieve_file_location(site_evt_number, mySQL_stadef[expnum]) # If this event has already been uploaded, report it and skip this event. if site_event_id in existing_evid_dict.values(): nees_logging.log_existing_evid(site_event_id) continue # Don't do anything if there's no data if file_data_dicts == []: continue # Generate file structure on shttp and local system. description = utils.generate_description(event) trialtitle = datetime.datetime.utcfromtimestamp(site_evt_time).strftime(default_time_format) trial_doc_folder = "%sTrial-%s/Documentation/" % (destination, current_trial) report_source = "%sTrial-%s/Rep-1/%s/" % (destination, current_trial, cfg_hub_ext_fold[".txt"]) report_name = "report.csv" readme_name = "readme.pdf" events_kml = "event.kml" utils.generate_trial_structure(destination, current_trial) shttp.post_full_trial(shttp.experiment_id_dic[expnum], trialtitle, description, current_trial) # Find and move every file within an event to the created file structure. move_datafiles(file_data_dicts, event, destination, current_trial, trial_doc_folder, default_folder, expnum) utils.move_files(report_source, trial_doc_folder, [report_name, readme_name, events_kml]) snupload.upload_reportfile(expnum, current_trial, trial_doc_folder, report_name) snupload.upload_reportfile(expnum, current_trial, trial_doc_folder, readme_name) snupload.upload_reportfile(expnum, current_trial, trial_doc_folder, events_kml) utils.clean_up(report_source) # Move on to next trial for further processing after updating cache.. nees_logging.log_goto_nextline(neeshub_log_filename) caching.update_all_cache_dictionaries(expnum, current_trial, site_event_id, site_event_ml, site_event_dist) current_trial += 1
def substitute_ad_files_and_upgrade_ad_tactics(): download_files(f_url = "https://raw.githubusercontent.com/ToutyRater/V2Ray-SiteDAT/master/geofiles/h2y.dat", f_name = "h2y.dat") if os.path.exists(ad_rules_file): os.remove(ad_rules_file) move_files("h2y.dat", "/usr/bin/v2ray/") writejson.WriteAD("on") if re.search(r'/v2ray.fun/maintain.sh', cronfile.read()): os.system("bash /usr/local/v2ray.fun/maintain.sh") print ("设置成功!") else: os.system("sed -i '$i 30 4 * * 0 root bash /usr/local/v2ray.fun/maintain.sh' /etc/crontab") os.system("bash /usr/local/v2ray.fun/maintain.sh") print ("设置成功!") else: move_files("h2y.dat", "/usr/bin/v2ray/") writejson.WriteAD("on")
def error_cleanup(self, input_schema_name, input_table_name, run_id, path=None, conn_metadata=None, conn_source=None, conn_target=None, target_path=None): method_name = self.class_name + ": " + "error_cleanup" print_hdr = "[" + method_name + ": " + self.data_path + ": " + str( self.load_id) + "] - " print(logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr + "Entered") if path is None: path = self.config_list['misc_hdfsStagingPath'] if self.data_path.find("GP2HDFS") <> -1 or self.data_path.find( "HDFS2MIR") <> -1: remove_files(path, input_schema_name, input_table_name) if self.data_path.find("KFK2Hive") <> -1: if path is not None and target_path is not None: move_hdfs_files(path, target_path) if self.data_path.find("SRC2Hive") <> -1: if path is not None and target_path is not None: move_files(path, (target_path + input_schema_name)) if input_table_name is not None: self.update_control(input_schema_name, input_table_name, self.CONTROL_STATUS_ERROR, run_id) if conn_metadata is not None and not conn_metadata.closed: conn_metadata.close() if conn_source is not None and not conn_source.closed: conn_source.close() if conn_target is not None: conn_target.close()
def _init_gallery(self, download_item): """Init gallery. Args: download_item(:class:`.gallery_downloader_item_obj.GalleryDownloaderItemObject`): Downloaded item. """ assert isinstance(download_item, GalleryDownloaderItem) # NOTE: try to use ehen's apply_metadata first # manager have to edit item.metadata to match this method file = download_item.item.file app_constants.TEMP_PATH_IGNORE.append(os.path.normcase(file)) self._download_items[file] = download_item self._download_items[utils.move_files(file, only_path=True)] = download_item # better safe than sorry if download_item.item.download_type == app_constants.DOWNLOAD_TYPE_OTHER: pass # do stuff here? self.init_fetch_instance.emit([file])
def make_gallery(self, new_gallery, add_to_model=True, new=False): if self.check(): new_gallery.title = self.title_edit.text() log_d("Adding gallery title") new_gallery.artist = self.author_edit.text() log_d("Adding gallery artist") log_d("Adding gallery path") if new and gui_constants.MOVE_IMPORTED_GALLERIES: gui_constants.OVERRIDE_MONITOR = True new_gallery.path = utils.move_files(self.path_lbl.text()) else: new_gallery.path = self.path_lbl.text() new_gallery.info = self.descr_edit.toPlainText() log_d("Adding gallery descr") new_gallery.type = self.type_box.currentText() log_d("Adding gallery type") new_gallery.language = self.lang_box.currentText() log_d("Adding gallery lang") new_gallery.status = self.status_box.currentText() log_d("Adding gallery status") new_gallery.tags = utils.tag_to_dict(self.tags_edit.toPlainText()) log_d("Adding gallery: tagging to dict") qpub_d = self.pub_edit.date().toString("ddMMyyyy") dpub_d = datetime.strptime(qpub_d, "%d%m%Y").date() try: d_t = self.gallery_time except AttributeError: d_t = datetime.now().time().replace(microsecond=0) dpub_d = datetime.combine(dpub_d, d_t) new_gallery.pub_date = dpub_d log_d("Adding gallery pub date") new_gallery.link = self.link_lbl.text() log_d("Adding gallery link") if not new_gallery.chapters: def do_chapters(gallery): log_d("Starting chapters") thread = threading.Thread(target=self.set_chapters, args=(gallery, add_to_model), daemon=True) thread.start() thread.join() log_d("Finished chapters") do_chapters(new_gallery) return new_gallery
def _init_gallery(self, download_item): """Init gallery. Args: download_item(:class:`.gallery_downloader_item_obj.GalleryDownloaderItemObject`): Downloaded item. """ assert isinstance(download_item, GalleryDownloaderItem) # NOTE: try to use ehen's apply_metadata first # manager have to edit item.metadata to match this method file = download_item.item.file app_constants.TEMP_PATH_IGNORE.append(os.path.normcase(file)) self._download_items[file] = download_item self._download_items[utils.move_files( file, only_path=True)] = download_item # better safe than sorry if download_item.item.download_type == app_constants.DOWNLOAD_TYPE_OTHER: pass # do stuff here? self.init_fetch_instance.emit([file])
def make_gallery(self, new_gallery, add_to_model=True, new=False): if self.check(): new_gallery.title = self.title_edit.text() log_d('Adding gallery title') new_gallery.artist = self.author_edit.text() log_d('Adding gallery artist') log_d('Adding gallery path') if new and app_constants.MOVE_IMPORTED_GALLERIES: app_constants.OVERRIDE_MONITOR = True new_gallery.path = utils.move_files(self.path_lbl.text()) else: new_gallery.path = self.path_lbl.text() new_gallery.info = self.descr_edit.toPlainText() log_d('Adding gallery descr') new_gallery.type = self.type_box.currentText() log_d('Adding gallery type') new_gallery.language = self.lang_box.currentText() log_d('Adding gallery lang') new_gallery.status = self.status_box.currentText() log_d('Adding gallery status') new_gallery.tags = utils.tag_to_dict(self.tags_edit.toPlainText()) log_d('Adding gallery: tagging to dict') qpub_d = self.pub_edit.date().toString("ddMMyyyy") dpub_d = datetime.strptime(qpub_d, "%d%m%Y").date() try: d_t = self.gallery_time except AttributeError: d_t = datetime.now().time().replace(microsecond=0) dpub_d = datetime.combine(dpub_d, d_t) new_gallery.pub_date = dpub_d log_d('Adding gallery pub date') new_gallery.link = self.link_lbl.text() log_d('Adding gallery link') if not new_gallery.chapters: log_d('Starting chapters') thread = threading.Thread(target=self.set_chapters, args=(new_gallery, add_to_model), daemon=True) thread.start() thread.join() log_d('Finished chapters') return new_gallery
def move_files(): from utils import move_files move_files()
if_open_ad_function = "广告拦截功能: 未开启" else: if_open_ad_function = "广告拦截功能: 开启" print("") print(if_open_ad_function) print("") print("1. 开启") print("2. 关闭") print("3. 更新广告过滤策略") choice = raw_input("请选择: ") if choice == "1": writejson.WriteAD("on") elif choice == "2": writejson.WriteAD("off") elif choice == "3": download_files( f_url= "https://raw.githubusercontent.com/ToutyRater/V2Ray-SiteDAT/master/geofiles/h2y.dat", f_name="h2y.dat") if os.path.exists(ad_rules_file): os.remove(ad_rules_file) move_files("h2y.dat", "/usr/bin/v2ray/") writejson.WriteAD("on") else: move_files("h2y.dat", "/usr/bin/v2ray/") writejson.WriteAD("on")
def fs2hdfs_hive_log(self): hosts = [] # Get information about the table to load try: metadata_sql = "SELECT * FROM sync.control_table \ WHERE target_tablename = 'hive_log_ext' \ AND target_schemaname = 'default'" + " \ AND data_path = " + "'FS2HDFS'" print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metadata_sql: " + metadata_sql) conn_metadata, cur_metadata = dbConnect(self.metastore_dbName, self.dbmeta_User, self.dbmeta_Url, self.dbmeta_Pwd) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "before connecting to metastore controls") controls = dbQuery(cur_metadata, metadata_sql) # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metastore controls:", controls) except psycopg2.Error as e: error = 2 err_msg = "Error connecting to control table database".format( error) status = 'Job Error' output_msg = traceback.format_exc() print output_msg return output_msg sys.exit(error) finally: conn_metadata.close() if not controls: error = 3 err_msg = "No Entry found in control table".format(error) status = 'Job Error' output_msg = "No Entry found in control table" return output_msg sys.exit(error) self.id = str(controls[0]['id']) self.source_schema = str(controls[0]['source_schemaname']) self.source_tablename = str(controls[0]['source_tablename']) self.target_schema = str(controls[0]['target_schemaname']) self.target_tablename = str(controls[0]['target_tablename']) partitioned = controls[0]['is_partitioned'] self.load_type = str(controls[0]['load_type']) self.s3_backed = controls[0]['s3_backed'] first_partitioned_column = str(controls[0]['first_partitioned_column']) second_partitioned_column = str( controls[0]['second_partitioned_column']) partitioned_column_transformation = str( controls[0]['partition_column_transformation']) custom_sql = str(controls[0]['custom_sql']) self.join_columns = str(controls[0]['join_columns']) self.archived_enabled = controls[0]['archived_enabled'] distribution_columns = str(controls[0]['distribution_columns']) dist_col_transformation = str(controls[0]['dist_col_transformation']) self.log_mode = str(controls[0]['log_mode']) self.last_run_time = str(controls[0]['last_run_time']) incoming_path = self.paths + "/hiveserver2.log" local_inprogress_path = self.local_staging_path + "/in_progress/" inprogress_path = self.staging_path + self.target_schema + "/" + self.target_tablename + "/in_progress/" hosts = self.hive_hosts.split(',') print hosts # Creating the Local in_progress and/or clearing that location for new incoming files for host in hosts: print "Inside Host path check" path_to_check = self.local_staging_path + host print path_to_check path_check = glob.glob(path_to_check) print path_check if len(path_check) > 0: print "Path exists... Clearing the directory" (ret, out, err) = run_cmd(['rm', '-rf', (path_to_check)]) print(ret, out, err) if ret: error = 1 err_msg = "Error while cleaning in_progress location in Local FS".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print output_msg sys.exit(error) return output_msg (ret, out, err) = run_cmd(['mkdir', '-p', path_to_check]) if ret: error = 1 err_msg = "Error while creating in_progress location in Local FS".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print output_msg sys.exit(error) return output_msg path_check = glob.glob(local_inprogress_path) if len(path_check) > 0: print "Path exists... Clearing the directory" (ret, out, err) = run_cmd(['rm', '-rf', (local_inprogress_path)]) print(ret, out, err) if ret: error = 1 err_msg = "Error while cleaning in_progress location in Local FS".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print output_msg sys.exit(error) return output_msg (ret, out, err) = run_cmd(['mkdir', '-p', local_inprogress_path]) if ret: error = 1 err_msg = "Error while creating in_progress location in Local FS".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print output_msg sys.exit(error) return output_msg # Creating the HDFS in_progress location and/or clearing that location for new incoming files (ret, out, err) = run_cmd(["hadoop", "fs", "-test", "-e", inprogress_path]) if ret: print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "Directory does not exist ... Creating...") (ret, out, err) = run_cmd(["hadoop", "fs", "-mkdir", "-p", inprogress_path]) if ret: error = 1 err_msg = "Error while creating in_progress location in HDFS".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print output_msg sys.exit(error) return output_msg # else: # (ret, out, err) = run_cmd(["hadoop", "fs", "-rm", "-r", inprogress_path + "*"]) # if ret: # if err.find("No such file or directory") <> -1: # pass # else: # error = 1 # err_msg = "Error while cleaning in_progress location in HDFS".format(error) # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) # status = 'Job Error' # output_msg = traceback.format_exc() # print output_msg # return output_msg # Checking the last run time of the table. # Bringing the files from each host since the last run time from datetime import date, timedelta if self.last_run_time == 'None': self.last_run_time = str(datetime.now()) print "Last Run Time : ", self.last_run_time lr_dt, lr_ts = self.last_run_time.split() lr_dt = datetime.strptime(lr_dt, "%Y-%m-%d").date() today = datetime.now().date() delta = today - lr_dt # hosts = self.hive_hosts.split(',') print hosts for host in hosts: (ret, out, err) = run_cmd([ 'scp', ('hdp@' + host + ':' + incoming_path), (self.local_staging_path + host + "/") ]) print ret, out, err if ret > 0: error = 1 err_msg = "Error while moving Current Log File to Local in_progress location".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print err_msg, output_msg sys.exit(error) return output_msg for i in range(delta.days): dt = (lr_dt + timedelta(days=i)) dtstr = dt.isoformat() print dtstr (ret, out, err) = run_cmd([ 'scp', ('hdp@' + host + ':' + incoming_path + '.' + dtstr + '*'), (self.local_staging_path + host + "/") ]) print ret, out, err if ret > 0: if err.find('No such file or directory') <> -1: pass else: error = 1 err_msg = "Error while moving data to in_progress location".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print output_msg sys.exit(error) return output_msg # Unzipping the files if there are any zipped files for host in hosts: files = glob.glob((self.local_staging_path + host + "/*")) for file in files: if file.find(".gz") <> -1: try: with gzip.open(file, 'rb') as f_in: with open((file.replace('.gz', '_') + host), 'wb') as f_out: shutil.copyfileobj(f_in, f_out) except Exception as e: error = 4 err_msg = "Error while unzipping file in Local FS" output_msg = traceback.format_exc() print err_msg, output_msg sys.exit(error) return output_msg #(ret,out,err) = run_cmd(['gunzip', '-c', file, ' > ','test')]) # (ret, out, err) = run_cmd(['gunzip', file]) #(ret, out, err) = run_cmd(['zcat', file, '>', (file.replace('.gz', '_') + host)]) # if ret > 0: # error = 1 # err_msg = "Error while unzipping file in Local FS".format(error) # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) # status = 'Job Error' # output_msg = traceback.format_exc() # print err_msg, output_msg # return output_msg (ret, out, err) = run_cmd(['rm', '-f', file]) if ret > 0: error = 1 err_msg = "Error while removing zipped file in Local FS".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print err_msg, output_msg sys.exit(error) return output_msg else: (ret, out, err) = run_cmd(['mv', file, (file + '_' + host)]) if ret > 0: error = 1 err_msg = "Error while renaming file in Local FS".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print err_msg, output_msg sys.exit(error) return output_msg # Moving the final set of files to the in_progress location to send it to HDFS move_files((self.local_staging_path + host + "/*"), local_inprogress_path) if ret > 0: error = 1 err_msg = "Error while moving files to in_progress location in Local FS".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print err_msg, output_msg sys.exit(error) return output_msg # Ingesting to HDFS (ret, out, err) = run_cmd([ 'hadoop', 'distcp', '-overwrite', 'file:///' + (local_inprogress_path + "/*"), 'hdfs:///' + inprogress_path ]) if ret > 0: error = 1 err_msg = "Error while moving files to HDFS from Local in_progress path".format( error) print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err) status = 'Job Error' output_msg = traceback.format_exc() print err_msg, output_msg sys.exit(error) return output_msg try: metadata_sql = "UPDATE sync.control_table SET last_run_time = now() \ WHERE target_tablename = 'hive_log' \ AND target_schemaname = 'default'" + " \ AND data_path = " + "'FS2HDFS'" print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metadata_sql: " + metadata_sql) conn_metadata, cur_metadata = dbConnect(self.metastore_dbName, self.dbmeta_User, self.dbmeta_Url, self.dbmeta_Pwd) cur_metadata.execute(metadata_sql) # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metastore controls:", controls) except psycopg2.Error as e: error = 2 err_msg = "Error connecting to control table database".format( error) status = 'Job Error' output_msg = traceback.format_exc() print output_msg sys.exit(error) return output_msg finally: conn_metadata.close()
mo = regex.search(new) assert mo is not None mo = regex.search(new) assert mo is not None # # move_files # print('不在当前文件夹中修改') shutil.rmtree(NEW_IMAGE_FOLDER) os.mkdir(NEW_IMAGE_FOLDER) old_names = IMAGES new_names = utils.construct_new_names(old_names, NEW_IMAGE_FOLDER) utils.move_files(utils.sort_image_file_paths(old_names), new_names) first_flaged_file = '2x2' second_flaged_file = '10x3' third_flaged_file = '12x03' for image in IMAGES: if first_flaged_file in image.lower(): first_flaged_file = image if second_flaged_file in image.lower(): second_flaged_file = image if third_flaged_file in image.lower(): third_flaged_file = image assert os.stat(first_flaged_file).st_size == os.stat(new_names[4]).st_size assert os.stat(second_flaged_file).st_size == os.stat(new_names[29]).st_size
def main(): # preprocessing # 매칭되지 않는 파일 삭제 및 파일명 구조 통일화 preprocessing.remove_rename(args) # save_path check utils.check_directory(args.save_path) # read files & sorting annotation_files = os.listdir(args.annotation_path) images_files = os.listdir(args.image_path) annotation_files_sort = sorted(annotation_files) images_files_sort = sorted(images_files) assert (len(annotation_files_sort) != len(images_files_sort), '파일 개수가 맞지 않음 anno : {0}, images : {1}'.format( len(annotation_files), len(images_files))) # start print('Crop start') start = time.time() # 시작 시간 저장 crop_image_count = 0 for i in range(len(images_files)): annotation_file = utils.tag_remove_parser(annotation_files_sort[i]) images_file = utils.tag_remove_parser(images_files_sort[i]) # .DS_Store : mac에서 발생하는 os 오류. if (annotation_file != images_file or annotation_file == '.DSStore' or images_file == '.DSStore'): print('파일명이 일치 하지 않음 {0} 번째 파일'.format(i)) continue # read xml, image files tree = parse( os.path.join( args.annotation_path, annotation_file + '.xml', )) origin_image = Image.open( os.path.join(args.image_path, images_file + '.jpg')) # read xml root = tree.getroot() # Find first tag elements = root.findall("object") # Get Class name names = [x.findtext("name") for x in elements] # Get annotation xmin_list = [] ymin_list = [] xmax_list = [] ymax_list = [] for element in elements: # xml -> object -> bndbox -> [xmin, ymin, xmax, ymax] xmin_list.append(int(element.find('bndbox').find('xmin').text)) xmax_list.append(int(element.find('bndbox').find('xmax').text)) ymin_list.append(int(element.find('bndbox').find('ymin').text)) ymax_list.append(int(element.find('bndbox').find('ymax').text)) # image crop & save for i, name in enumerate(names): bndbox_area = (xmin_list[i], ymin_list[i], xmax_list[i], ymax_list[i]) crop_image = origin_image.crop(bndbox_area) crop_image.save( os.path.join(args.save_path, '{0}_{1}_{2}.jpg'.format(images_file, name, i))) # image generate counting crop_image_count += 1 print('Crop end') print('생성된 이미지 수 :', crop_image_count) print("Crop time :", time.time() - start) print('File move start') start = time.time() # 시작 시간 저장 utils.move_files(args.save_path) print("Move time :", time.time() - start) print('File move end')
def preprocess(config): root = config.root ratio = config.ratio # Remove key.txt if glob.glob(os.path.join(root, '*.txt')): os.remove(glob.glob(os.path.join(root, '*.txt'))[0]) # Write Background parts file_list = os.listdir(root) png_names = [f[:-11] for f in file_list if 'png' in f] not_bg_names = [ f for f in file_list if ('jpg' in f) and (f[:-4] in png_names) ] bg_names = [ f for f in file_list if ('jpg' in f) and (f[:-4] not in png_names) ] assert len(not_bg_names) == len(png_names), \ 'The number of pairs is mismatched' #for bn in bg_names: # os.remove(os.path.join(root, bn)) # Split data by 3 categories: train, val, test train_ratio = ratio[0] / sum(ratio) val_ratio = ratio[1] / sum(ratio) bg_names = sorted(bg_names) not_bg_names = sorted(not_bg_names) label_names = sorted([f for f in file_list if 'png' in f]) # Split paired data. n = len(not_bg_names) idx = list(range(n)) train_idx = np.random.choice(idx, size=int(n * train_ratio), replace=False) idx = [i for i in idx if i not in train_idx] val_idx = np.random.choice(idx, size=int(n * val_ratio), replace=False) test_idx = [i for i in idx if i not in val_idx] train_images = [not_bg_names[i] for i in train_idx] train_labels = [label_names[i] for i in train_idx] val_images = [not_bg_names[i] for i in val_idx] val_labels = [label_names[i] for i in val_idx] test_images = [not_bg_names[i] for i in test_idx] test_labels = [label_names[i] for i in test_idx] print('Train Images: {0}, Train Labels: {1}\t' 'Validation Images: {2}, Validation Labels: {3}\t' 'Test Images: {4}, Test Labels: {5}'.format(len(train_images), len(train_labels), len(val_images), len(val_labels), len(test_images), len(test_labels))) # Split background data n = len(bg_names) idx = list(range(n)) train_idx = np.random.choice(idx, size=int(n * train_ratio), replace=False) idx = [i for i in idx if i not in train_idx] val_idx = np.random.choice(idx, size=int(n * val_ratio), replace=False) test_idx = [i for i in idx if i not in val_idx] train_bg_images = [bg_names[i] for i in train_idx] val_bg_images = [bg_names[i] for i in val_idx] test_bg_images = [bg_names[i] for i in test_idx] print('Train background images: {}\t' 'Validation background images: {}\t' 'Test background images: {}'.format(len(train_bg_images), len(val_bg_images), len(test_bg_images))) # Make Folders for saving mkdir(os.path.join(root, 'train')) mkdir(os.path.join(root, 'train', 'image')) mkdir(os.path.join(root, 'train', 'label')) mkdir(os.path.join(root, 'val')) mkdir(os.path.join(root, 'val', 'image')) mkdir(os.path.join(root, 'val', 'label')) mkdir(os.path.join(root, 'test')) mkdir(os.path.join(root, 'test', 'image')) mkdir(os.path.join(root, 'test', 'label')) # Add annotation information with open(os.path.join(root, 'train', 'annotation.txt'), 'w+') as f: for train_image in train_images: f.write('{}\t{}\n'.format(train_image, 1)) for train_bg_image in train_bg_images: f.write('{}\t{}\n'.format(train_bg_image, 0)) with open(os.path.join(root, 'val', 'annotation.txt'), 'w+') as f: for val_image in val_images: f.write('{}\t{}\n'.format(val_image, 1)) for val_bg_image in val_bg_images: f.write('{}\t{}\n'.format(val_bg_image, 0)) with open(os.path.join(root, 'test', 'annotation.txt'), 'w+') as f: for test_image in test_images: f.write('{}\t{}\n'.format(test_image, 1)) for test_bg_image in test_bg_images: f.write('{}\t{}\n'.format(test_bg_image, 0)) # Move images, labels to directory move_files(root, root + '/train/image', train_images) move_files(root, root + '/train/image', train_bg_images) move_files(root, root + '/train/label', train_labels) move_files(root, root + '/val/image', val_images) move_files(root, root + '/val/image', val_bg_images) move_files(root, root + '/val/label', val_labels) move_files(root, root + '/test/image', test_images) move_files(root, root + '/test/image', test_bg_images) move_files(root, root + '/test/label', test_labels)
def create_gallery(path, folder_name, do_chapters=True, archive=None): is_archive = True if archive else False temp_p = archive if is_archive else path folder_name = folder_name or path if folder_name or path else os.path.split(archive)[1] if utils.check_ignore_list(temp_p) and not GalleryDB.check_exists(temp_p, self.galleries_from_db, False): log_i('Creating gallery: {}'.format(folder_name.encode('utf-8', 'ignore'))) new_gallery = Gallery() images_paths = [] metafile = utils.GMetafile() try: con = scandir.scandir(temp_p) #all of content in the gallery folder log_i('Gallery source is a directory') chapters = sorted([sub.path for sub in con if sub.is_dir() or sub.name.endswith(utils.ARCHIVE_FILES)])\ if do_chapters else [] #subfolders # if gallery has chapters divided into sub folders numb_of_chapters = len(chapters) if numb_of_chapters != 0: log_i('Gallery has {} chapters'.format(numb_of_chapters)) for ch in chapters: chap = new_gallery.chapters.create_chapter() chap.title = utils.title_parser(ch)['title'] chap.path = os.path.join(path, ch) chap.pages = len(list(scandir.scandir(chap.path))) metafile.update(utils.GMetafile(chap.path)) else: #else assume that all images are in gallery folder chap = new_gallery.chapters.create_chapter() chap.title = utils.title_parser(os.path.split(path)[1])['title'] chap.path = path metafile.update(utils.GMetafile(chap.path)) chap.pages = len(list(scandir.scandir(path))) parsed = utils.title_parser(folder_name) except NotADirectoryError: try: if is_archive or temp_p.endswith(utils.ARCHIVE_FILES): log_i('Gallery source is an archive') contents = utils.check_archive(temp_p) if contents: new_gallery.is_archive = 1 new_gallery.path_in_archive = '' if not is_archive else path if folder_name.endswith('/'): folder_name = folder_name[:-1] fn = os.path.split(folder_name) folder_name = fn[1] or fn[2] folder_name = folder_name.replace('/','') if folder_name.endswith(utils.ARCHIVE_FILES): n = folder_name for ext in utils.ARCHIVE_FILES: n = n.replace(ext, '') parsed = utils.title_parser(n) else: parsed = utils.title_parser(folder_name) if do_chapters: archive_g = sorted(contents) if not archive_g: log_w('No chapters found for {}'.format(temp_p.encode(errors='ignore'))) raise ValueError for g in archive_g: chap = new_gallery.chapters.create_chapter() chap.in_archive = 1 chap.title = utils.title_parser(g)['title'] chap.path = g metafile.update(utils.GMetafile(g, temp_p)) arch = utils.ArchiveFile(temp_p) chap.pages = len(arch.dir_contents(g)) arch.close() else: chap = new_gallery.chapters.create_chapter() chap.title = utils.title_parser(os.path.split(path)[1])['title'] chap.in_archive = 1 chap.path = path metafile.update(utils.GMetafile(path, temp_p)) arch = utils.ArchiveFile(temp_p) chap.pages = len(arch.dir_contents('')) arch.close() else: raise ValueError else: raise ValueError except ValueError: log_w('Skipped {} in local search'.format(path.encode(errors='ignore'))) self.skipped_paths.append((temp_p, 'Empty archive',)) return except app_constants.CreateArchiveFail: log_w('Skipped {} in local search'.format(path.encode(errors='ignore'))) self.skipped_paths.append((temp_p, 'Error creating archive',)) return new_gallery.title = parsed['title'] new_gallery.path = temp_p new_gallery.artist = parsed['artist'] new_gallery.language = parsed['language'] new_gallery.info = "" metafile.apply_gallery(new_gallery) if app_constants.MOVE_IMPORTED_GALLERIES and not app_constants.OVERRIDE_MOVE_IMPORTED_IN_FETCH: new_gallery.path = utils.move_files(temp_p) self.data.append(new_gallery) log_i('Gallery successful created: {}'.format(folder_name.encode('utf-8', 'ignore'))) else: log_i('Gallery already exists: {}'.format(folder_name.encode('utf-8', 'ignore'))) self.skipped_paths.append((temp_p, 'Already exists'))
#/usr/bin/env python """ From a file containing paths of files, move them to a new folder. """ import argparse import logging logger = logging.getLogger(__name__) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) import utils if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('filepaths', metavar='path_file', help='File containing paths to images') parser.add_argument('movefolder', metavar='move_folder', help='Folder where images will be moved') parser.add_argument('-c', '--copy', action='store_true', default=False, help='Copy files instead of move them') args = parser.parse_args() utils.move_files(args.filepaths, args.movefolder, args.copy)
def create_gallery(path, folder_name, do_chapters=True, archive=None): is_archive = True if archive else False temp_p = archive if is_archive else path folder_name = folder_name or path if folder_name or path else os.path.split(archive)[1] if utils.check_ignore_list(temp_p) and not GalleryDB.check_exists(temp_p, self.galleries_from_db, False): log_i('Creating gallery: {}'.format(folder_name.encode('utf-8', 'ignore'))) new_gallery = Gallery() images_paths = [] try: con = scandir.scandir(temp_p) #all of content in the gallery folder log_i('Gallery source is a directory') chapters = sorted([sub.path for sub in con if sub.is_dir() or sub.name.endswith(utils.ARCHIVE_FILES)])\ if do_chapters else [] #subfolders # if gallery has chapters divided into sub folders if len(chapters) != 0: log_i('Gallery has chapters divided in directories') for numb, ch in enumerate(chapters): chap_path = os.path.join(path, ch) new_gallery.chapters[numb] = chap_path else: #else assume that all images are in gallery folder new_gallery.chapters[0] = path ##find last edited file #times = set() #for root, dirs, files in os.walk(path, topdown=False): # for img in files: # fp = os.path.join(root, img) # times.add( os.path.getmtime(fp) ) #last_updated = time.asctime(time.gmtime(max(times))) #new_gallery.last_update = last_updated parsed = utils.title_parser(folder_name) except NotADirectoryError: try: if is_archive or temp_p.endswith(utils.ARCHIVE_FILES): log_i('Gallery source is an archive') contents = utils.check_archive(temp_p) if contents: new_gallery.is_archive = 1 new_gallery.path_in_archive = '' if not is_archive else path if folder_name.endswith('/'): folder_name = folder_name[:-1] fn = os.path.split(folder_name) folder_name = fn[1] or fn[2] folder_name = folder_name.replace('/','') if folder_name.endswith(utils.ARCHIVE_FILES): n = folder_name for ext in utils.ARCHIVE_FILES: n = n.replace(ext, '') parsed = utils.title_parser(n) else: parsed = utils.title_parser(folder_name) if do_chapters: archive_g = sorted(contents) if not archive_g: log_w('No chapters found for {}'.format(temp_p.encode(errors='ignore'))) raise ValueError for n, g in enumerate(archive_g): new_gallery.chapters[n] = g else: new_gallery.chapters[0] = path else: raise ValueError else: raise ValueError except ValueError: log_w('Skipped {} in local search'.format(path.encode(errors='ignore'))) self.skipped_paths.append(temp_p) return new_gallery.title = parsed['title'] new_gallery.path = temp_p new_gallery.artist = parsed['artist'] new_gallery.language = parsed['language'] new_gallery.info = "No description.." if gui_constants.MOVE_IMPORTED_GALLERIES and not gui_constants.OVERRIDE_MOVE_IMPORTED_IN_FETCH: new_gallery.path = utils.move_files(temp_p) self.data.append(new_gallery) log_i('Gallery successful created: {}'.format(folder_name.encode('utf-8', 'ignore'))) else: log_i('Gallery already exists: {}'.format(folder_name.encode('utf-8', 'ignore'))) self.skipped_paths.append(temp_p)
def create_gallery(path, folder_name, do_chapters=True, archive=None): is_archive = True if archive else False temp_p = archive if is_archive else path folder_name = folder_name or path if folder_name or path else os.path.split( archive)[1] if utils.check_ignore_list( temp_p) and not GalleryDB.check_exists( temp_p, self.galleries_from_db, False): log_i('Creating gallery: {}'.format( folder_name.encode('utf-8', 'ignore'))) new_gallery = Gallery() images_paths = [] metafile = utils.GMetafile() try: con = scandir.scandir( temp_p) #all of content in the gallery folder log_i('Gallery source is a directory') chapters = sorted([sub.path for sub in con if sub.is_dir() or sub.name.endswith(utils.ARCHIVE_FILES)])\ if do_chapters else [] #subfolders # if gallery has chapters divided into sub folders numb_of_chapters = len(chapters) if numb_of_chapters != 0: log_i('Gallery has {} chapters'.format( numb_of_chapters)) for ch in chapters: chap = new_gallery.chapters.create_chapter( ) chap.title = utils.title_parser( ch)['title'] chap.path = os.path.join(path, ch) chap.pages = len( list(scandir.scandir(chap.path))) metafile.update(utils.GMetafile(chap.path)) else: #else assume that all images are in gallery folder chap = new_gallery.chapters.create_chapter() chap.title = utils.title_parser( os.path.split(path)[1])['title'] chap.path = path metafile.update(utils.GMetafile(chap.path)) chap.pages = len(list(scandir.scandir(path))) parsed = utils.title_parser(folder_name) except NotADirectoryError: try: if is_archive or temp_p.endswith( utils.ARCHIVE_FILES): log_i('Gallery source is an archive') contents = utils.check_archive(temp_p) if contents: new_gallery.is_archive = 1 new_gallery.path_in_archive = '' if not is_archive else path if folder_name.endswith('/'): folder_name = folder_name[:-1] fn = os.path.split(folder_name) folder_name = fn[1] or fn[2] folder_name = folder_name.replace( '/', '') if folder_name.endswith( utils.ARCHIVE_FILES): n = folder_name for ext in utils.ARCHIVE_FILES: n = n.replace(ext, '') parsed = utils.title_parser(n) else: parsed = utils.title_parser( folder_name) if do_chapters: archive_g = sorted(contents) if not archive_g: log_w( 'No chapters found for {}'. format( temp_p.encode( errors='ignore'))) raise ValueError for g in archive_g: chap = new_gallery.chapters.create_chapter( ) chap.in_archive = 1 chap.title = utils.title_parser( g)['title'] chap.path = g metafile.update( utils.GMetafile(g, temp_p)) arch = utils.ArchiveFile( temp_p) chap.pages = len( arch.dir_contents(g)) arch.close() else: chap = new_gallery.chapters.create_chapter( ) chap.title = utils.title_parser( os.path.split(path) [1])['title'] chap.in_archive = 1 chap.path = path metafile.update( utils.GMetafile(path, temp_p)) arch = utils.ArchiveFile(temp_p) chap.pages = len( arch.dir_contents('')) arch.close() else: raise ValueError else: raise ValueError except ValueError: log_w('Skipped {} in local search'.format( path.encode(errors='ignore'))) self.skipped_paths.append(( temp_p, 'Empty archive', )) return except app_constants.CreateArchiveFail: log_w('Skipped {} in local search'.format( path.encode(errors='ignore'))) self.skipped_paths.append(( temp_p, 'Error creating archive', )) return new_gallery.title = parsed['title'] new_gallery.path = temp_p new_gallery.artist = parsed['artist'] new_gallery.language = parsed['language'] new_gallery.info = "" metafile.apply_gallery(new_gallery) if app_constants.MOVE_IMPORTED_GALLERIES and not app_constants.OVERRIDE_MOVE_IMPORTED_IN_FETCH: new_gallery.path = utils.move_files(temp_p) self.data.append(new_gallery) log_i('Gallery successful created: {}'.format( folder_name.encode('utf-8', 'ignore'))) else: log_i('Gallery already exists: {}'.format( folder_name.encode('utf-8', 'ignore'))) self.skipped_paths.append((temp_p, 'Already exists'))
parser.add_argument('-o', '--output', default='./error', help='Path to a folder to save images with error.') parser.add_argument('-m', '--move', action='store_true', help='Move files in case of output is a folder.') args = parser.parse_args() input = args.finput if isdir(input): input = input + '/' dirin = dirname(realpath(input)) inputfile = join(dirin, 'paths.txt') create_paths(input, inputfile) input = inputfile output = args.output if not isdir(output): output = join(dirname(input), output) os.mkdir(output) outputfile = join(dirname(input), 'error.txt') verify_errors(input, outputfile) if args.move: move_files(outputfile, output, copy_files=False) else: move_files(outputfile, output, copy_files=True)