def prepare_data(posfilpath, negfilepath, size, word2index): posfiles = common.get_files(posfilpath) negfiles = common.get_files(negfilepath) sz = min(2*len(posfiles), 2*len(negfiles), size) x_train = list() y_train = np.zeros((size, 1)) for i in range(int(size/2)): posline = common.get_content(posfilpath + posfiles[i]) k = 2*i if (posline != ""): sent_pos = posline.lower().split(" ") sent_ls = list() for word in sent_pos: if word in word2index: sent_ls.append(word2index[word]) y_train[k][0] = 1 x_train.append(list(set(sent_ls))) negline = common.get_content(negfilepath + negfiles[i]) if (negline != ""): sent_neg = negline.lower().split(" ") sent_ls = list() for word in sent_neg: if word in word2index: sent_ls.append(word2index[word]) x_train.append(list(set(sent_ls))) return (x_train, y_train, sz)
def prepare_data(posfilpath, negfilepath, size): posfiles = common.get_files(posfilpath) negfiles = common.get_files(negfilepath) sz = min(2 * len(posfiles), 2 * len(negfiles), size) x_train = list() x_concat = list() for i in range(int(size / 2)): posline = common.get_content(posfilpath + posfiles[i]) if (posline != ""): sent_pos = posline.lower().split(" ") sent_ls = list() for word in sent_pos: if word in word2index: word_i = word2index[word] sent_ls.append(word_i) x_concat.append(word_i) x_train.append(sent_ls) negline = common.get_content(negfilepath + negfiles[i]) if (negline != ""): sent_neg = negline.lower().split(" ") sent_ls = list() for word in sent_neg: if word in word2index: word_i = word2index[word] sent_ls.append(word_i) x_concat.append(word_i) x_train.append(sent_ls) return (x_train, x_concat, sz)
def __init__(self, image_dir_1, image_dir_2, output_dir, index_file): self.image_dir_1 = image_dir_1 self.img_files_1 = common.get_files(image_dir_1) self.image_dir_2 = image_dir_2 self.img_files_2 = common.get_files(image_dir_2) print(len(self.img_files_1), len(self.img_files_2)) self.output_dir = output_dir self.index_file = index_file return
def file_scanner(numbers, to_servers): while True: print("process1 file_scanner", "-" * 20) print(to_servers) share_folder = os.getcwd() + "/share/" file_dic = get_files(share_folder) print(file_dic) for server in to_servers: # we will reconect 5 times for i in range(5): try: sleeptime = random.uniform(0.5, 1) print('scan folder after:', round(sleeptime, 3), ' seconds') time.sleep(sleeptime) # share_folder = os.getcwd() + "/share/" # filename = "t2.txt" # localfilename = share_folder + filename # print(localfilename) for localfilename, value in file_dic.items(): time.sleep(3) print("@" * 10, "send localfile:", localfilename, " => remote server") sender_file(server, localfilename, value) break except Exception as ex: print("Unexpected error in file_scanner:", sys.exc_info()[0], "ex=", ex) else: print("try 5 times file_scanner")
def main(imagedir, sim=0.5): """Example main app using this library. Parameters ---------- imagedir : str path to directory with images sim : float (0..1) similarity index (see imagecluster.cluster()) """ dbfn = pj(imagedir, ic_base_dir, 'fingerprints.pk') # print("dbfn= " + dbfn) if not os.path.exists(dbfn): os.makedirs(os.path.dirname(dbfn), exist_ok=True) print("no fingerprints database {} found".format(dbfn)) files = co.get_files(imagedir) model = ic.get_model() print("running all images through NN model ...".format(dbfn)) fps = ic.fingerprints(files, model, size=(224, 224)) # print(fps) co.write_pk(fps, dbfn) else: print("loading fingerprints database {} ...".format(dbfn)) fps = co.read_pk(dbfn) print("clustering ...") print(len(fps)) clusters = ic.cluster(fps, sim) help(imagedir, clusters) ic.make_links(ic.cluster(fps, sim), pj(imagedir, ic_base_dir, 'clusters'))
def revert_files(src_dir): images = common.get_files(src_dir) for path, filename in images: os.rename(path, common.SAMPLES_DIR + filename) print("Reverted {} images.\n".format(len(images)))
def main(_): model_graph = tf.Graph() with model_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(FLAGS.frozen_graph_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') with model_graph.as_default(): with tf.Session(graph=model_graph) as sess: inputs = model_graph.get_tensor_by_name('image_tensor:0') classes = model_graph.get_tensor_by_name('classes:0') correct = 0 files = common.get_files('./test') all_num = len(files) for f in files: image = Image.open(f) image = image.resize([INPUT_HEIGHT, INPUT_WIDTH]) image = np.array(image, dtype=np.uint8) image_np = np.expand_dims(image, axis=0) predicted_label = sess.run(classes, feed_dict={inputs: image_np}) if f.lower().count('true') > 0: gt = 1 else: gt = 0 if predicted_label[0] == gt: correct += 1 print('predict label {} vs gt {} '.format( predicted_label[0], gt)) print('total {} correct {} wrong {} rate {}'.format( all_num, correct, all_num - correct, float(correct) / all_num))
def process_dataset(imagedir, modelname = 'ResNet50', input_size = 224): """ processes a list of files (filenames) 1 - calculates sha256 hash and renames files to hash 2 - crops out image from meme and copies into ./cropped/ 3 - calculates phash using the imagehash library 4 - calculates dnn fingerprint using keras and tensorflow 6 - does the same for cropped versions 7 - applies a clustering algorithm on fingerprints of cropped images 8 - plots all members of all clusters into a jpg file and saves results - returns a pandas dataframe with the information """ files = co.get_files(imagedir) print("> Renaming {} files (to sha256 hash)".format(len(files))) files, hashes = co.rename_files(files, imagedir) print("done.") # create pandas dataframe containing all data df = pd.DataFrame(index=hashes) df['filename'] = files df['hash'] = hashes print("> Phashing {} files".format(len(files))) phashes = ph.return_phashes(files) df['phash'] = phashes print("done.") print("> Cropping and copying all images") df = co.crop_images(df, imagedir, input_size) print("done.") print("> Loading Keras model {}".format(modelname)) model, getFingerprint = ph.get_model(modelname=modelname) # construct fingerprint model (second to last layer) #getFingerprint = K.function([model.layers[0].input], # [model.layers[-2].output]) print("done.") print("> Running images through DNN {}".format(modelname)) # get fingerprints fps, preds, labels = ph.fingerprints(files, model, getFingerprint, size=(input_size,input_size), modelname=modelname) df['fingerprints'] = fps df['labels'] = labels print("> Running CROPPED images through DNN {}".format(modelname)) # get fingerprints cfps, cpreds, clabels = ph.fingerprints(files, model, getFingerprint, size=(input_size,input_size), modelname=modelname) df['cropped_fingerprints'] = cfps df['cropped_labels'] = clabels print("done.") return df
def main(training_set_ratio): arrows = pd.DataFrame(np.zeros((3, 4), dtype=np.int32), index=('hollow', 'full', 'thin'), columns=('down', 'left', 'right', 'up')) images = common.get_files(common.SAMPLES_DIR) if images: for _, filename in images: arrow_direction, arrow_type = common.arrow_labels(filename) arrows[arrow_direction][arrow_type] += 1 num_samples = int(arrows.min().min() * training_set_ratio) print("Samples per type: {}".format(num_samples * 4)) for t, _ in arrows.iterrows(): print("\nProcessing {} arrows...".format(t)) for d in arrows: candidates = [(p, f) for p, f in images if common.arrow_labels(f) == (d, t)] print("{}: {}".format(d, len(candidates))) training = random.sample(candidates, num_samples) for path, filename in training: dst_dir = common.TRAINING_DIR + d + "/" if not os.path.exists(dst_dir): os.makedirs(dst_dir) os.rename(path, dst_dir + filename) candidates = [c for c in candidates if c not in training] validation = random.sample( candidates, int(len(candidates) * VALIDATION_SET_RATIO)) for path, filename in validation: dst_dir = common.VALIDATION_DIR + d + "/" if not os.path.exists(dst_dir): os.makedirs(dst_dir) os.rename(path, dst_dir + filename) testing = [c for c in candidates if c not in validation] for path, filename in testing: dst_dir = common.TESTING_DIR + d + "/" if not os.path.exists(dst_dir): os.makedirs(dst_dir) os.rename(path, dst_dir + filename) show_summary() print("\nFinished!")
def __init__(self, root_dir, image_dir, label_file, index_file, copy_dir, copy_file): self.img_files = common.get_files(os.path.join(root_dir, image_dir)) self.image_dir = image_dir self.label_file = label_file self.car_points = [] self.index_file = index_file self.copy_dir = copy_dir self.copy_file = copy_file print("[len] ", len(self.img_files)) return
def main(): common.create_directories() print(" Q = ignore image") print(" 1 = label as round") print(" 2 = label as wide") print(" 3 = label as narrow") print("ARROW KEYS = label directions\n") global type_label global direction_label global plt_text unlabeled_imgs = common.get_files(common.SCREENSHOTS_DIR) num_labeled = 0 for path, filename in unlabeled_imgs: print("Processing {}...".format(filename)) img = plt.imread(path) ax = plt.gca() fig = plt.gcf() plot = ax.imshow(img) plt.axis('off') plt.tight_layout() plt_text = plt.text(0, 0, "") fig.canvas.mpl_connect('key_press_event', on_press) mng = plt.get_current_fig_manager() mng.window.state('zoomed') plt.show() if type_label and direction_label: dst_filename = "{}_{}_{}.png".format( type_dictionary[type_label], direction_label, time.strftime("%Y%m%d-%H%M%S")) os.rename(path, common.LABELED_DIR + dst_filename) direction_label = '' type_label = None num_labeled += 1 if len(unlabeled_imgs) > 0: print("\nLabeled {} out of {} images ({}%).".format( num_labeled, len(unlabeled_imgs), 100 * num_labeled // len(unlabeled_imgs))) print("Finished!") else: print("\nThere are no images to label.")
def get_fp(imagedir,ic_base_dir = 'imagecluster'): dbfn = pj(ic_base_dir, 'fingerprints.pk') if os.path.exists(os.path.dirname(dbfn)): shutil.rmtree(os.path.dirname(dbfn)) os.makedirs(os.path.dirname(dbfn)) # , exist_ok=True print("no fingerprints database {} found".format(dbfn)) files = co.get_files(imagedir) model = ic.get_model() print("running all images through NN model ...".format(dbfn)) fps = ic.fingerprints(files, model, size=(224, 224)) co.write_pk(fps, dbfn)
def get_pipeline_files(raw_dir, extracted_dir, parsed_dir, standardized_dir): """ yields file names corresponding to the raw, extracted, parsed, standardized intermediate steps pf the pipeline """ suffix = ".json" for raw_file in [f for f in get_files(raw_dir) if identify_file(f)]: filestem = get_filename_without_extension(raw_file) extracted_file = os.path.join(extracted_dir, filestem + suffix) parsed_file = os.path.join(parsed_dir, filestem + suffix) standardized_file = os.path.join(standardized_dir, filestem + suffix) yield raw_file, extracted_file, parsed_file, standardized_file
def __init__(self, image_dir): self.img = None self.img_files = common.get_files(image_dir) print('total imgs len: ', len(self.img_files)) self.image_dir = image_dir self.plate_encode = "utf8" self.label_normal_file = os.path.join('.', 'label_normal.txt') self.label_test_file = os.path.join('.', 'label_test.txt') self.label_error_file = os.path.join('.', 'label_error.txt') self.index_file = os.path.join('.', 'index.txt') return
def main(training_set_ratio): common.create_directories() arrows = pd.DataFrame(np.zeros((3, 4), dtype=np.int32), index=('round', 'wide', 'narrow'), columns=('down', 'left', 'right', 'up')) images = [(p, f) for p, f in common.get_files(common.SAMPLES_DIR) if f[-5] != 'F'] if images: for _, filename in images: arrow_direction, arrow_type = common.arrow_labels(filename) arrows[arrow_direction][arrow_type] += 1 num_samples = int(arrows.min().min() * training_set_ratio) print("Samples per type: {}".format(num_samples * 4)) for t, _ in arrows.iterrows(): print("\nProcessing {} arrows...".format(t)) for direction in arrows: candidates = [(p, f) for p, f in images if common.arrow_labels(f) == (direction, t)] print("{}: {}".format(direction, len(candidates))) training = random.sample(candidates, num_samples) for path, filename in training: dst_dir = common.TRAINING_DIR + direction + '/' os.rename(path, dst_dir + filename) os.rename(flipped(path), dst_dir + flipped(filename)) candidates = [c for c in candidates if c not in training] validation = random.sample( candidates, int(len(candidates) * VALIDATION_SET_RATIO)) for path, filename in validation: dst_dir = common.VALIDATION_DIR + direction + '/' os.rename(path, dst_dir + filename) os.rename(flipped(path), dst_dir + flipped(filename)) testing = [c for c in candidates if c not in validation] for path, filename in testing: dst_dir = common.TESTING_DIR + direction + '/' os.rename(path, dst_dir + filename) os.rename(flipped(path), dst_dir + flipped(filename)) show_summary() print("\nFinished!")
def prepare_data(posfilpath, negfilepath, size): posfiles = common.get_files(posfilpath) negfiles = common.get_files(negfilepath) reviews = list() raw_sent = list() for i in range(int(size / 2)): posline = common.get_content(posfilpath + posfiles[i]) if (posline != ""): posline = posline.lower() sent = posline.split(" ") reviews.append(makesentvec(sent)) raw_sent.append(posline) negline = common.get_content(negfilepath + negfiles[i]) if (negline != ""): negline = negline.lower() sent = negline.split(" ") reviews.append(makesentvec(sent)) raw_sent.append(negline) return (reviews, raw_sent)
def main(): """User interface.""" parser = argparse.ArgumentParser( description='Helper script to split MIDI files into ' 'shorter sequences by a fixed duration.') parser.add_argument('files', metavar='path', nargs='+', help='path of input files (.mid). ' 'accepts * as wildcard') parser.add_argument('--target_folder', metavar='path', help='folder path where ' 'generated results are stored', default=common.DEFAULT_TARGET_FOLDER) parser.add_argument('--duration', metavar='seconds', type=int, help='duration of every slice in seconds', choices=range(1, 60 * 60), default=DEFAULT_DURATION) args = parser.parse_args() file_paths = common.get_files(args.files) target_folder_path = args.target_folder duration = args.duration common.check_target_folder(target_folder_path) for file_path in file_paths: if common.is_invalid_file(file_path): continue # Read MIDi file and clean up score = midi.PrettyMIDI(file_path) score.remove_invalid_notes() print('➜ Loaded "{}".'.format(file_path)) # Split MIDI file! splits = split_score(score, duration) # Generate MIDI files from splits generate_files(file_path, target_folder_path, splits) print('') print('Done!')
def append_from_folder(self, folder) : # recursively appends all the XML game files # which are found inside a folder xml_files = common.get_files(folder, ["xml"]) for xml_file in xml_files : try : games = common.Game_set(self.globalvars, xml_file) self.games_list.append(games) except Exception, e : common.error("Could not cope with " + xml_file) common.error("Problem in XML file reading ", e, traceback.format_exc())
def append_from_folder(self, folder): # recursively appends all the XML game files # which are found inside a folder xml_files = common.get_files(folder, ["xml"]) for xml_file in xml_files: try: games = common.Game_set(self.globalvars, xml_file) self.games_list.append(games) except Exception, e: common.error("Could not cope with " + xml_file) common.error("Problem in XML file reading ", e, traceback.format_exc())
def main(imagedir, processingDir, similarity=.4): imageFeaturePath = pathJoin(processingDir, 'imagefeatures.pk') if not os.path.exists(imageFeaturePath): common.makeDir(imageFeaturePath) print("No imagefeatures database {} found".format(imageFeaturePath)) files = common.get_files(imagedir) model = imagecluster.get_model() fps = imagecluster.fingerprints(files, model, size=(224, 224)) common.write_pk(fps, imageFeaturePath) else: print("loading fingerprints database {} ...".format(imageFeaturePath)) fps = common.read_pk(imageFeaturePath) print("clustering ...") imagecluster.make_links(imagecluster.cluster(fps, similarity), pathJoin(imagedir, processingDir, 'clusters'))
def save_cache(self): now = datetime.datetime.now() old_keys = [] for key in self.data.keys(): if (now - key).days >= 7: old_keys.append(key) for key in old_keys: del self.data[key] try: name, tmpname = get_files(self.cp, "transfer_data") fp = open(tmpname, 'w') pickle.dump(self.data, fp) fp.close() commit_files(name, tmpname) log.debug("Saved data to cache.") except Exception, e: log.warning("Unable to write cache; message: %s" % str(e))
def show_summary(): matrix = pd.DataFrame(np.zeros((4, 5), dtype=np.int32), index=( 'hollow', 'full', 'thin', 'total'), columns=('down', 'left', 'right', 'up', 'total')) images = common.get_files(common.SAMPLES_DIR) for _, filename in images: arrow_direction, arrow_type = common.arrow_labels(filename) matrix[arrow_direction][arrow_type] += 1 matrix['total'][arrow_type] += 1 matrix[arrow_direction]['total'] += 1 matrix['total']['total'] += 1 print(cf.salmon("Samples summary")) print(matrix, "\n")
def indent(directory, file_ext, spaces=4, padding=12, left_justify=False, recursive=False, overwrite=False, verbose=False): """ Method to perform the indentation process. """ pv.path(directory, "input", False, True) pv.string(file_ext, "file extension", False, None) pv.intvalue(spaces, "spaces", True, False, False) pv.intvalue(padding, "padding", True, False, False) directory = os.path.abspath(directory) spaces = int(spaces) padding = int(padding) num = 1 if verbose: print "\nGathering files to process. Please wait.\n" list_files = common.get_files(directory, file_ext, recursive) if len(list_files) == 0: if verbose: print "No files to process.\n" return just = len(str(len(list_files))) for file_input in list_files: if verbose: print "Processing file %s of %s: '%s'" % \ (str(num).rjust(just, " "), str(len(list_files)), file_input) num += 1 if overwrite: __indent_file(file_input, spaces, padding, left_justify) else: __indent_copy(file_input, spaces, padding, left_justify) if verbose: print "\nFinished.\n"
def generate_record(images_path, output_path): writer = tf.python_io.TFRecordWriter(output_path) files = common.get_files(images_path, '*.jpg') for f in files: print(os.path.basename(f)) if f.lower().count('true') > 0: label = 1 else: label = 0 shape, binary_image = get_image_binary(f) example = tf.train.Example(features=tf.train.Features(feature={ 'image/label': int64_feature(label), 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channel': int64_feature(shape[2]), 'image/encoded': bytes_feature(binary_image) })) writer.write(example.SerializeToString()) writer.close()
def run(self, sect, mode="hourly"): self.mode = mode if self.mode == "monthly": self.num_points = 12 elif self.mode == "daily": self.num_points = 31 self.parse_data() for format in self.format: name, tmpname = get_files(self.cp, sect, format=format) self.build_canvas(format=format) self.draw() fd = open(tmpname, 'w') self.file = fd self.write_graph(format=format) fd.flush() os.fsync(fd) commit_files(name, tmpname)
def generate_record(images_path, output_path): writer = tf.python_io.TFRecordWriter(output_path) files = common.get_files(images_path, '*.jepg') labels = common.get_sub_directory_name(images_path) for f in files: label = common.get_parent_dir(f) # string label to int label = labels.index(label) print('label {} image {} '.format(label, f)) shape, binary_image = get_image_binary(f) example = tf.train.Example(features=tf.train.Features( feature={ 'image/label': int64_feature(label), 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channel': int64_feature(shape[2]), 'image/encoded': bytes_feature(binary_image) })) writer.write(example.SerializeToString()) writer.close()
def run(data_dir): """ Run the pipeline, intermediate files go into data/extracted, data/parsed, and data/standardized which is ingested into ./expenses.db """ cores = mp.cpu_count() pool = mp.Pool(cores) jobs = [] raw_dir = os.path.join(data_dir, "raw") extracted_dir = os.path.join(data_dir, "extracted") parsed_dir = os.path.join(data_dir, "parsed") standardized_dir = os.path.join(data_dir, "standardized") if len(os.listdir(raw_dir)) == 0: return False make_dirs([extracted_dir, parsed_dir, standardized_dir]) with tempfile.TemporaryDirectory() as tmp_standardized_dir: for raw, extracted, parsed, standardized in get_pipeline_files( raw_dir, extracted_dir, parsed_dir, tmp_standardized_dir): jobs.append( pool.apply_async(_etl, (raw, extracted, parsed, standardized))) [job.get() for job in jobs] # TODO: hardcoded expenses tablename and expenses.db ingest( get_files(tmp_standardized_dir), "expenses", os.path.join(data_dir, "expenses.db"), ) for file_ in os.listdir(tmp_standardized_dir): os.replace( os.path.join(tmp_standardized_dir, file_), os.path.join(standardized_dir, file_), ) return True
def replace(directory, file_ext, mode, spaces=8, recursive=False, overwrite=False, verbose=True): """ Method to perform the replacement process. """ pv.path(directory, "input", False, True) pv.string(file_ext, "file extension", False, None) mode = mode.lower() pv.compstr(mode, "mode", ["spaces", "tabs"]) pv.intvalue(spaces, "spaces", True, False, False) directory = os.path.abspath(directory) spaces = int(spaces) num = 1 if verbose: print "\nGathering files to process. Please wait.\n" list_files = common.get_files(directory, file_ext, recursive) just = len(str(len(list_files))) for file_input in list_files: if verbose: print "Processing file %s of %s: '%s'" % \ (str(num).rjust(just, " "), str(len(list_files)), file_input) num += 1 if overwrite: __replace_file(file_input, mode, spaces) else: __replace_copy(file_input, mode, spaces) if verbose: print "\nFinished.\n"
total_multimedia_files = 0.0 total_multimedia_files_with_lic_info = 0.0 mediafiles = MediaFiles() activities = Activities() for data_folder in constants.data_folders : os.chdir(os.path.join(original_path, data_folder)) for media_type in media_types : data_files = common.get_files(".", extensions[media_type]) for data_file in data_files : (path, filename) = os.path.split(data_file) (data_file_with_no_extension, extension) = os.path.splitext(data_file) par_file_original_URL = constant_undefined par_file_license_URL = constant_undefined par_author_name = constant_undefined par_license_name = constant_undefined file_languages = []
import os import re import common as common # set output directory current_dir = os.getcwd() data_dir = os.path.join(current_dir, '..', 'data', 'segmented') output_file = os.path.join(current_dir, '..', 'data', 'reference.txt') print 'Current directory:\t', current_dir print 'Data directory:\t\t', data_dir print 'Output filename:\t', output_file # get all wav file names file_names = common.get_files(data_dir, '.wav')[0] print 'File count:\t', len(file_names) print 'Sample:\t\t', file_names[:8] # generate references numbers = ['nula', 'jedna ', 'dva', 'tři', 'čtyři', 'pět', 'šest', 'sedum', 'osum', 'devět'] with open(output_file, 'w') as fw: fw.write('#!MLF!#\n') for file in file_names: matchObj = re.match(r'(\d)-(\d)-(\d)\.wav', file) index = int(matchObj.group(3)) file_number = re.sub('\.wav$', '', file) fw.write('"*/%s.lab"\n%s\n\n' % (file_number, numbers[index]))
def find_tuxes(self) : tuxes = common.get_files(constants.folder_award_tuxes, constants.image_extensions) self.tuxes = common.randomize_list(tuxes)
def main(): watchB=time.time() cp = configure() # Set the alarm in case if we go over time if cp.notimeout: log.debug("Running script with no timeout.") else: timeout = int(cp.get("Settings", "timeout")) signal.alarm(timeout) log.debug("Setting script timeout to %i." % timeout) # Hourly graphs (24-hours) watchS=time.time() hjds = HourlyJobsDataSource(cp) hjds.run() dg = DisplayGraph(cp, "jobs_hourly") jobs_data, hours_data = hjds.query_jobs() dg.data = [i/1000 for i in jobs_data] num_jobs = sum(jobs_data) dg.run("jobs_hourly") hjds.disconnect() log.debug("Time log - Hourly Jobs Query Time: %s", (time.time() - watchS)) watchS=time.time() dg = DisplayGraph(cp, "hours_hourly") dg.data = [float(i)/1000. for i in hours_data] dg.run("hours_hourly") log.debug("Time log - Hourly Jobs Graph Time: %s", (time.time() - watchS)) # Generate the more-complex transfers graph watchS=time.time() dst = DataSourceTransfers(cp) dst.run() log.debug("Time log - Hourly Transfer Query Time: %s", (time.time() - watchS)) watchS=time.time() dg = DisplayGraph(cp, "transfer_volume_hourly") dg.data = [i[1]/1024./1024. for i in dst.get_data()] log.debug("Transfer volumes: %s" % ", ".join([str(float(i)) for i in \ dg.data])) dg.run("transfer_volume_hourly") transfer_data = dst.get_data() dg = DisplayGraph(cp, "transfers_hourly") dg.data = [long(i[0])/1000. for i in dst.get_data()] dg.run("transfers_hourly") num_transfers = sum([i[0] for i in transfer_data]) transfer_volume_mb = sum([i[1] for i in transfer_data]) dst.disconnect() log.debug("Time log - Hourly Transfer Graph Time: %s", (time.time() - watchS)) # Daily (30-day graphs) watchS=time.time() dds = DailyDataSource(cp) dds.run() # Jobs graph jobs_data_daily, hours_data_daily = dds.query_jobs() dds.disconnect() log.debug("Time log - 30-Day Query Time: %s", (time.time() - watchS)) # Job count graph watchS=time.time() dg = DisplayGraph(cp, "jobs_daily") dg.data = [float(i)/1000. for i in jobs_data_daily] num_jobs_hist = sum(jobs_data_daily) dg.run("jobs_daily", mode="daily") log.debug("Time log - 30-Day Count Graph Time: %s", (time.time() - watchS)) # CPU Hours graph watchS=time.time() dg = DisplayGraph(cp, "hours_daily") dg.data = [float(i)/1000000. for i in hours_data_daily] num_hours_hist = sum(hours_data_daily) dg.run("hours_daily", mode="daily") log.debug("Time log - 30-Day CPU Graph Time: %s", (time.time() - watchS)) # Transfers data watchS=time.time() transfer_data_daily, volume_data_daily = dds.query_transfers() log.debug("Time log - 30-Day Transfer Query Time: %s", (time.time() - watchS)) # Transfer count graph watchS=time.time() dg = DisplayGraph(cp, "transfers_daily") dg.data = [float(i)/1000000. for i in transfer_data_daily] num_transfers_daily = sum(transfer_data_daily) dg.run("transfers_daily", mode="daily") log.debug("Time log - 30-Day Transfer Count Graph Time: %s", (time.time() - watchS)) # Transfer volume graph watchS=time.time() dg = DisplayGraph(cp, "transfer_volume_daily") dg.data = [float(i)/1024.**3 for i in volume_data_daily] volume_transfers_hist = sum(volume_data_daily) dg.run("transfer_volume_daily", mode="daily") log.debug("Time log - 30-Day Transfer Volume Graph Time: %s", (time.time() - watchS)) # Monthly graphs (12-months) watchS=time.time() mds = MonthlyDataSource(cp) mds.run() # Jobs graph jobs_data_monthly, hours_data_monthly = mds.query_jobs() mds.disconnect() log.debug("Time log - 12-Month Query Time: %s", (time.time() - watchS)) # Job count graph watchS=time.time() dg = DisplayGraph(cp, "jobs_monthly") dg.data = [float(i)/1000000. for i in jobs_data_monthly] num_jobs_monthly = sum(jobs_data_monthly) dg.run("jobs_monthly", mode="monthly") log.debug("Time log - 12-Month Job Count Graph Time: %s", (time.time() - watchS)) # Hours graph watchS=time.time() dg = DisplayGraph(cp, "hours_monthly") dg.data = [float(i)/1000000. for i in hours_data_monthly] num_hours_monthly = sum(hours_data_monthly) dg.run("hours_monthly", mode="monthly") log.debug("Time log - 12-Month Hour Graph Time: %s", (time.time() - watchS)) # Transfers graph watchS=time.time() transfer_data_monthly, volume_data_monthly = mds.query_transfers() log.debug("Time log - 12-Month Transfer Query Time: %s", (time.time() - watchS)) # Transfer count graph watchS=time.time() dg = DisplayGraph(cp, "transfers_monthly") dg.data = [float(i)/1000000. for i in transfer_data_monthly] num_transfers_monthly = sum(transfer_data_monthly) dg.run("transfers_monthly", mode="monthly") log.debug("Time log - 12-Month Transfer Count Graph Time: %s", (time.time() - watchS)) # Transfer volume graph watchS=time.time() dg = DisplayGraph(cp, "transfer_volume_monthly") dg.data = [float(i)/1024.**3 for i in volume_data_monthly] volume_transfers_monthly = sum(volume_data_monthly) dg.run("transfer_volume_monthly", mode="monthly") log.debug("Time log - 12-Month Transfer Volume Graph Time: %s", (time.time() - watchS)) # Pull OIM data watchS=time.time() ods = OIMDataSource(cp) num_sites = len(ods.query_sites()) ces, ses = ods.query_ce_se() log.debug("Time log - OIM Time: %s", (time.time() - watchS)) # Generate the JSON log.debug("Starting JSON creation") d = Data(cp) d.add_datasource(mds) d.add_datasource(hjds) d.add_datasource(dst) d.add_datasource(dds) d.add_datasource(ods) # Monthly data log.debug("Done creating JSON.") name, tmpname = get_files(cp, "json") fd = open(tmpname, 'w') d.run(fd) commit_files(name, tmpname) log.info("OSG Display done!") log.debug("Time log - Total Time: %s", (time.time() - watchB))
sys.stdout = fw report(args) # parameters for debugging if DEBUG: argvals = 'mfcc data/segmented feats.hdf5 0.01 0.005 --win_func hamming'.split() else: argvals = None # parse arguments and print them args = get_args(argvals) report(args) # collect files file_names, file_paths = common.get_files(args.input_dir, '.wav', verbose=True) # select window function for framing if args.win_func == 'rectangular': win_func = lambda x: np.ones((x,)) elif args.win_func == 'hamming': win_func = lambda x: np.hamming(x) elif args.win_func == 'hanning': win_func = lambda x: np.hanning(x) # select feature function if args.features == 'ste': feat_func = lambda x, y: fts.get_ste(x, y, args.frame_length, args.frame_step, win_func) elif args.features == 'sti': feat_func = lambda x, y: fts.get_sti(x, y, args.frame_length, args.frame_step, win_func) elif args.features == 'stzcr':