Example #1
0
def prepare_data(posfilpath, negfilepath, size, word2index):
    posfiles = common.get_files(posfilpath)
    negfiles = common.get_files(negfilepath)
    sz = min(2*len(posfiles), 2*len(negfiles), size)

    x_train = list()
    y_train = np.zeros((size, 1))

    for i in range(int(size/2)):
        posline = common.get_content(posfilpath + posfiles[i])
        k = 2*i
        if (posline != ""):
            sent_pos = posline.lower().split(" ")
            sent_ls = list()
            for word in sent_pos:
                if word in word2index:
                    sent_ls.append(word2index[word])
                    y_train[k][0] = 1
            x_train.append(list(set(sent_ls)))
        
        negline = common.get_content(negfilepath + negfiles[i])

        if (negline != ""):
            sent_neg = negline.lower().split(" ")
            sent_ls = list()
            for word in sent_neg:
                if word in word2index:
                    sent_ls.append(word2index[word])
            x_train.append(list(set(sent_ls)))
    return (x_train, y_train, sz)
Example #2
0
def prepare_data(posfilpath, negfilepath, size):
    posfiles = common.get_files(posfilpath)
    negfiles = common.get_files(negfilepath)
    sz = min(2 * len(posfiles), 2 * len(negfiles), size)

    x_train = list()
    x_concat = list()

    for i in range(int(size / 2)):
        posline = common.get_content(posfilpath + posfiles[i])
        if (posline != ""):
            sent_pos = posline.lower().split(" ")
            sent_ls = list()
            for word in sent_pos:
                if word in word2index:
                    word_i = word2index[word]
                    sent_ls.append(word_i)
                    x_concat.append(word_i)
            x_train.append(sent_ls)

        negline = common.get_content(negfilepath + negfiles[i])

        if (negline != ""):
            sent_neg = negline.lower().split(" ")
            sent_ls = list()
            for word in sent_neg:
                if word in word2index:
                    word_i = word2index[word]
                    sent_ls.append(word_i)
                    x_concat.append(word_i)
            x_train.append(sent_ls)

    return (x_train, x_concat, sz)
Example #3
0
    def __init__(self, image_dir_1, image_dir_2, output_dir, index_file):
        self.image_dir_1 = image_dir_1
        self.img_files_1 = common.get_files(image_dir_1)

        self.image_dir_2 = image_dir_2
        self.img_files_2 = common.get_files(image_dir_2)

        print(len(self.img_files_1), len(self.img_files_2))

        self.output_dir = output_dir
        self.index_file = index_file
        return
def file_scanner(numbers, to_servers):
    while True:

        print("process1 file_scanner", "-" * 20)
        print(to_servers)
        share_folder = os.getcwd() + "/share/"
        file_dic = get_files(share_folder)
        print(file_dic)

        for server in to_servers:
            # we will reconect 5 times
            for i in range(5):
                try:
                    sleeptime = random.uniform(0.5, 1)
                    print('scan folder after:', round(sleeptime, 3),
                          ' seconds')
                    time.sleep(sleeptime)

                    # share_folder = os.getcwd() + "/share/"

                    # filename = "t2.txt"
                    # localfilename = share_folder + filename
                    # print(localfilename)
                    for localfilename, value in file_dic.items():
                        time.sleep(3)
                        print("@" * 10, "send localfile:", localfilename,
                              " => remote server")
                        sender_file(server, localfilename, value)
                    break
                except Exception as ex:
                    print("Unexpected error in file_scanner:",
                          sys.exc_info()[0], "ex=", ex)
            else:
                print("try 5 times file_scanner")
Example #5
0
def main(imagedir, sim=0.5):
    """Example main app using this library.

    Parameters
    ----------
    imagedir : str
        path to directory with images
    sim : float (0..1)
        similarity index (see imagecluster.cluster())
    """
    dbfn = pj(imagedir, ic_base_dir, 'fingerprints.pk')
    # print("dbfn= " + dbfn)
    if not os.path.exists(dbfn):
        os.makedirs(os.path.dirname(dbfn), exist_ok=True)
        print("no fingerprints database {} found".format(dbfn))
        files = co.get_files(imagedir)
        model = ic.get_model()
        print("running all images through NN model ...".format(dbfn))
        fps = ic.fingerprints(files, model, size=(224, 224))
        # print(fps)
        co.write_pk(fps, dbfn)
    else:
        print("loading fingerprints database {} ...".format(dbfn))
        fps = co.read_pk(dbfn)
    print("clustering ...")
    print(len(fps))

    clusters = ic.cluster(fps, sim)
    help(imagedir, clusters)

    ic.make_links(ic.cluster(fps, sim), pj(imagedir, ic_base_dir, 'clusters'))
Example #6
0
def revert_files(src_dir):
    images = common.get_files(src_dir)

    for path, filename in images:
        os.rename(path, common.SAMPLES_DIR + filename)

    print("Reverted {} images.\n".format(len(images)))
Example #7
0
def main(_):
    model_graph = tf.Graph()
    with model_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(FLAGS.frozen_graph_path, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    with model_graph.as_default():
        with tf.Session(graph=model_graph) as sess:
            inputs = model_graph.get_tensor_by_name('image_tensor:0')
            classes = model_graph.get_tensor_by_name('classes:0')
            correct = 0
            files = common.get_files('./test')
            all_num = len(files)
            for f in files:
                image = Image.open(f)
                image = image.resize([INPUT_HEIGHT, INPUT_WIDTH])
                image = np.array(image, dtype=np.uint8)
                image_np = np.expand_dims(image, axis=0)
                predicted_label = sess.run(classes,
                                           feed_dict={inputs: image_np})
                if f.lower().count('true') > 0:
                    gt = 1
                else:
                    gt = 0
                if predicted_label[0] == gt:
                    correct += 1
                print('predict label {}  vs  gt  {} '.format(
                    predicted_label[0], gt))
            print('total {}  correct {} wrong {}  rate  {}'.format(
                all_num, correct, all_num - correct,
                float(correct) / all_num))
Example #8
0
def process_dataset(imagedir, modelname = 'ResNet50', input_size = 224):
    """
    processes a list of files (filenames) 
    
    1 - calculates sha256 hash and renames files to hash
    2 - crops out image from meme and copies into ./cropped/
    3 - calculates phash using the imagehash library
    4 - calculates dnn fingerprint using keras and tensorflow
    6 - does the same for cropped versions
    7 - applies a clustering algorithm on fingerprints of cropped images
    8 - plots all members of all clusters into a jpg file and saves results
    
    - returns a pandas dataframe with the information
    """
    files = co.get_files(imagedir)
    print("> Renaming {} files (to sha256 hash)".format(len(files)))
    files, hashes = co.rename_files(files, imagedir)
    print("done.")
    
    # create pandas dataframe containing all data
    df = pd.DataFrame(index=hashes)
    df['filename'] = files
    df['hash'] = hashes
    
    print("> Phashing {} files".format(len(files)))
    phashes = ph.return_phashes(files)
    df['phash'] = phashes
    print("done.")
    
    print("> Cropping and copying all images")
    df = co.crop_images(df, imagedir, input_size)
    print("done.")        
    
    print("> Loading Keras model {}".format(modelname))
    model, getFingerprint = ph.get_model(modelname=modelname)
    # construct fingerprint model (second to last layer)
    #getFingerprint = K.function([model.layers[0].input],
    #                              [model.layers[-2].output])
    
    print("done.")
    
    print("> Running images through DNN {}".format(modelname))
    # get fingerprints
    fps, preds, labels = ph.fingerprints(files, model, getFingerprint, size=(input_size,input_size), modelname=modelname)
    df['fingerprints'] = fps
    df['labels'] = labels
    
    print("> Running CROPPED images through DNN {}".format(modelname))
    # get fingerprints
    cfps, cpreds, clabels = ph.fingerprints(files, model, getFingerprint, size=(input_size,input_size), modelname=modelname)
    df['cropped_fingerprints'] = cfps
    df['cropped_labels'] = clabels
    
    print("done.")
    
    return df
Example #9
0
def main(training_set_ratio):
    arrows = pd.DataFrame(np.zeros((3, 4), dtype=np.int32),
                          index=('hollow', 'full', 'thin'),
                          columns=('down', 'left', 'right', 'up'))

    images = common.get_files(common.SAMPLES_DIR)

    if images:
        for _, filename in images:
            arrow_direction, arrow_type = common.arrow_labels(filename)

            arrows[arrow_direction][arrow_type] += 1

        num_samples = int(arrows.min().min() * training_set_ratio)

        print("Samples per type: {}".format(num_samples * 4))

        for t, _ in arrows.iterrows():
            print("\nProcessing {} arrows...".format(t))

            for d in arrows:
                candidates = [(p, f) for p, f in images
                              if common.arrow_labels(f) == (d, t)]

                print("{}: {}".format(d, len(candidates)))

                training = random.sample(candidates, num_samples)
                for path, filename in training:
                    dst_dir = common.TRAINING_DIR + d + "/"
                    if not os.path.exists(dst_dir):
                        os.makedirs(dst_dir)

                    os.rename(path, dst_dir + filename)

                candidates = [c for c in candidates if c not in training]

                validation = random.sample(
                    candidates, int(len(candidates) * VALIDATION_SET_RATIO))
                for path, filename in validation:
                    dst_dir = common.VALIDATION_DIR + d + "/"
                    if not os.path.exists(dst_dir):
                        os.makedirs(dst_dir)

                    os.rename(path, dst_dir + filename)

                testing = [c for c in candidates if c not in validation]
                for path, filename in testing:
                    dst_dir = common.TESTING_DIR + d + "/"
                    if not os.path.exists(dst_dir):
                        os.makedirs(dst_dir)

                    os.rename(path, dst_dir + filename)

    show_summary()

    print("\nFinished!")
Example #10
0
    def __init__(self, root_dir, image_dir, label_file, index_file, copy_dir, copy_file):
        self.img_files = common.get_files(os.path.join(root_dir, image_dir))
        self.image_dir = image_dir
        self.label_file = label_file
        self.car_points = []
        self.index_file = index_file
        self.copy_dir = copy_dir
        self.copy_file = copy_file

        print("[len] ", len(self.img_files))
        return
Example #11
0
def main():
    common.create_directories()

    print("         Q = ignore image")
    print("         1 = label as round")
    print("         2 = label as wide")
    print("         3 = label as narrow")
    print("ARROW KEYS = label directions\n")

    global type_label
    global direction_label
    global plt_text

    unlabeled_imgs = common.get_files(common.SCREENSHOTS_DIR)

    num_labeled = 0
    for path, filename in unlabeled_imgs:
        print("Processing {}...".format(filename))

        img = plt.imread(path)

        ax = plt.gca()
        fig = plt.gcf()
        plot = ax.imshow(img)

        plt.axis('off')
        plt.tight_layout()
        plt_text = plt.text(0, 0, "")

        fig.canvas.mpl_connect('key_press_event', on_press)

        mng = plt.get_current_fig_manager()
        mng.window.state('zoomed')

        plt.show()

        if type_label and direction_label:
            dst_filename = "{}_{}_{}.png".format(
                type_dictionary[type_label], direction_label, time.strftime("%Y%m%d-%H%M%S"))

            os.rename(path, common.LABELED_DIR + dst_filename)

            direction_label = ''
            type_label = None

            num_labeled += 1

    if len(unlabeled_imgs) > 0:
        print("\nLabeled {} out of {} images ({}%).".format(
            num_labeled, len(unlabeled_imgs), 100 * num_labeled // len(unlabeled_imgs)))
        print("Finished!")
    else:
        print("\nThere are no images to label.")
Example #12
0
def get_fp(imagedir,ic_base_dir = 'imagecluster'):

    dbfn = pj(ic_base_dir, 'fingerprints.pk')
    if os.path.exists(os.path.dirname(dbfn)):
        shutil.rmtree(os.path.dirname(dbfn))
    os.makedirs(os.path.dirname(dbfn))  # , exist_ok=True
    print("no fingerprints database {} found".format(dbfn))
    files = co.get_files(imagedir)
    model = ic.get_model()
    print("running all images through NN model ...".format(dbfn))
    fps = ic.fingerprints(files, model, size=(224, 224))
    co.write_pk(fps, dbfn)
Example #13
0
def get_pipeline_files(raw_dir, extracted_dir, parsed_dir, standardized_dir):
    """
    yields file names corresponding to the raw, extracted, parsed, standardized
    intermediate steps pf the pipeline
    """
    suffix = ".json"
    for raw_file in [f for f in get_files(raw_dir) if identify_file(f)]:
        filestem = get_filename_without_extension(raw_file)
        extracted_file = os.path.join(extracted_dir, filestem + suffix)
        parsed_file = os.path.join(parsed_dir, filestem + suffix)
        standardized_file = os.path.join(standardized_dir, filestem + suffix)
        yield raw_file, extracted_file, parsed_file, standardized_file
Example #14
0
    def __init__(self, image_dir):
        self.img = None
        self.img_files = common.get_files(image_dir)
        print('total imgs len: ', len(self.img_files))
        self.image_dir = image_dir
        self.plate_encode = "utf8"
        self.label_normal_file = os.path.join('.', 'label_normal.txt')
        self.label_test_file = os.path.join('.', 'label_test.txt')
        self.label_error_file = os.path.join('.', 'label_error.txt')
        self.index_file = os.path.join('.', 'index.txt')

        return
Example #15
0
def main(training_set_ratio):
    common.create_directories()

    arrows = pd.DataFrame(np.zeros((3, 4), dtype=np.int32),
                          index=('round', 'wide', 'narrow'),
                          columns=('down', 'left', 'right', 'up'))

    images = [(p, f) for p, f in common.get_files(common.SAMPLES_DIR)
              if f[-5] != 'F']

    if images:
        for _, filename in images:
            arrow_direction, arrow_type = common.arrow_labels(filename)

            arrows[arrow_direction][arrow_type] += 1

        num_samples = int(arrows.min().min() * training_set_ratio)

        print("Samples per type: {}".format(num_samples * 4))

        for t, _ in arrows.iterrows():
            print("\nProcessing {} arrows...".format(t))

            for direction in arrows:
                candidates = [(p, f) for p, f in images
                              if common.arrow_labels(f) == (direction, t)]

                print("{}: {}".format(direction, len(candidates)))

                training = random.sample(candidates, num_samples)
                for path, filename in training:
                    dst_dir = common.TRAINING_DIR + direction + '/'
                    os.rename(path, dst_dir + filename)
                    os.rename(flipped(path), dst_dir + flipped(filename))

                candidates = [c for c in candidates if c not in training]

                validation = random.sample(
                    candidates, int(len(candidates) * VALIDATION_SET_RATIO))
                for path, filename in validation:
                    dst_dir = common.VALIDATION_DIR + direction + '/'
                    os.rename(path, dst_dir + filename)
                    os.rename(flipped(path), dst_dir + flipped(filename))

                testing = [c for c in candidates if c not in validation]
                for path, filename in testing:
                    dst_dir = common.TESTING_DIR + direction + '/'
                    os.rename(path, dst_dir + filename)
                    os.rename(flipped(path), dst_dir + flipped(filename))

    show_summary()

    print("\nFinished!")
Example #16
0
def prepare_data(posfilpath, negfilepath, size):
    posfiles = common.get_files(posfilpath)
    negfiles = common.get_files(negfilepath)

    reviews = list()
    raw_sent = list()
    for i in range(int(size / 2)):
        posline = common.get_content(posfilpath + posfiles[i])
        if (posline != ""):
            posline = posline.lower()
            sent = posline.split(" ")
            reviews.append(makesentvec(sent))
            raw_sent.append(posline)

        negline = common.get_content(negfilepath + negfiles[i])

        if (negline != ""):
            negline = negline.lower()
            sent = negline.split(" ")
            reviews.append(makesentvec(sent))
            raw_sent.append(negline)

    return (reviews, raw_sent)
def main():
    """User interface."""

    parser = argparse.ArgumentParser(
        description='Helper script to split MIDI files into '
        'shorter sequences by a fixed duration.')
    parser.add_argument('files',
                        metavar='path',
                        nargs='+',
                        help='path of input files (.mid). '
                        'accepts * as wildcard')
    parser.add_argument('--target_folder',
                        metavar='path',
                        help='folder path where '
                        'generated results are stored',
                        default=common.DEFAULT_TARGET_FOLDER)
    parser.add_argument('--duration',
                        metavar='seconds',
                        type=int,
                        help='duration of every slice in seconds',
                        choices=range(1, 60 * 60),
                        default=DEFAULT_DURATION)

    args = parser.parse_args()

    file_paths = common.get_files(args.files)

    target_folder_path = args.target_folder
    duration = args.duration

    common.check_target_folder(target_folder_path)

    for file_path in file_paths:
        if common.is_invalid_file(file_path):
            continue

# Read MIDi file and clean up
        score = midi.PrettyMIDI(file_path)
        score.remove_invalid_notes()
        print('➜ Loaded "{}".'.format(file_path))

        # Split MIDI file!
        splits = split_score(score, duration)

        # Generate MIDI files from splits
        generate_files(file_path, target_folder_path, splits)

        print('')

    print('Done!')
Example #18
0
	def append_from_folder(self, folder) :
		# recursively appends all the XML game files
		# which are found inside a folder
		xml_files = common.get_files(folder, ["xml"])		
		
		for xml_file in xml_files :

			try :
				games = common.Game_set(self.globalvars, xml_file)
			
				self.games_list.append(games)

			except Exception, e :
				common.error("Could not cope with " + xml_file)
				common.error("Problem in XML file reading ", e, traceback.format_exc())
Example #19
0
    def append_from_folder(self, folder):
        # recursively appends all the XML game files
        # which are found inside a folder
        xml_files = common.get_files(folder, ["xml"])

        for xml_file in xml_files:

            try:
                games = common.Game_set(self.globalvars, xml_file)

                self.games_list.append(games)

            except Exception, e:
                common.error("Could not cope with " + xml_file)
                common.error("Problem in XML file reading ", e,
                             traceback.format_exc())
Example #20
0
def main(imagedir, processingDir, similarity=.4):

    imageFeaturePath = pathJoin(processingDir, 'imagefeatures.pk')
    if not os.path.exists(imageFeaturePath):
        common.makeDir(imageFeaturePath)
        print("No imagefeatures database {} found".format(imageFeaturePath))
        files = common.get_files(imagedir)
        model = imagecluster.get_model()
        fps = imagecluster.fingerprints(files, model, size=(224, 224))
        common.write_pk(fps, imageFeaturePath)
    else:
        print("loading fingerprints database {} ...".format(imageFeaturePath))
        fps = common.read_pk(imageFeaturePath)
    print("clustering ...")
    imagecluster.make_links(imagecluster.cluster(fps, similarity),
                            pathJoin(imagedir, processingDir, 'clusters'))
 def save_cache(self):
     now = datetime.datetime.now()
     old_keys = []
     for key in self.data.keys():
         if (now - key).days >= 7:
             old_keys.append(key)
     for key in old_keys:
         del self.data[key]
     try:
         name, tmpname = get_files(self.cp, "transfer_data")
         fp = open(tmpname, 'w')
         pickle.dump(self.data, fp)
         fp.close()
         commit_files(name, tmpname)
         log.debug("Saved data to cache.")
     except Exception, e:
         log.warning("Unable to write cache; message: %s" % str(e))
 def save_cache(self):
     now = datetime.datetime.now()
     old_keys = []
     for key in self.data.keys():
         if (now - key).days >= 7:
             old_keys.append(key)
     for key in old_keys:
         del self.data[key]
     try:
         name, tmpname = get_files(self.cp, "transfer_data")
         fp = open(tmpname, 'w')
         pickle.dump(self.data, fp)
         fp.close()
         commit_files(name, tmpname)
         log.debug("Saved data to cache.")
     except Exception, e:
         log.warning("Unable to write cache; message: %s" % str(e))
Example #23
0
def show_summary():
    matrix = pd.DataFrame(np.zeros((4, 5), dtype=np.int32), index=(
        'hollow', 'full', 'thin', 'total'), columns=('down', 'left', 'right', 'up', 'total'))

    images = common.get_files(common.SAMPLES_DIR)

    for _, filename in images:
        arrow_direction, arrow_type = common.arrow_labels(filename)

        matrix[arrow_direction][arrow_type] += 1

        matrix['total'][arrow_type] += 1
        matrix[arrow_direction]['total'] += 1
        matrix['total']['total'] += 1

    print(cf.salmon("Samples summary"))
    print(matrix, "\n")
Example #24
0
def indent(directory,
           file_ext,
           spaces=4,
           padding=12,
           left_justify=False,
           recursive=False,
           overwrite=False,
           verbose=False):
    """
        Method to perform the indentation process.
    """
    pv.path(directory, "input", False, True)
    pv.string(file_ext, "file extension", False, None)
    pv.intvalue(spaces, "spaces", True, False, False)
    pv.intvalue(padding, "padding", True, False, False)

    directory = os.path.abspath(directory)
    spaces = int(spaces)
    padding = int(padding)
    num = 1

    if verbose:
        print "\nGathering files to process. Please wait.\n"

    list_files = common.get_files(directory, file_ext, recursive)

    if len(list_files) == 0:
        if verbose:
            print "No files to process.\n"
            return

    just = len(str(len(list_files)))
    for file_input in list_files:
        if verbose:
            print "Processing file %s of %s: '%s'" % \
                (str(num).rjust(just, " "), str(len(list_files)), file_input)
            num += 1

        if overwrite:
            __indent_file(file_input, spaces, padding, left_justify)
        else:
            __indent_copy(file_input, spaces, padding, left_justify)

    if verbose:
        print "\nFinished.\n"
Example #25
0
def generate_record(images_path, output_path):
    writer = tf.python_io.TFRecordWriter(output_path)
    files = common.get_files(images_path, '*.jpg')
    for f in files:
        print(os.path.basename(f))
        if f.lower().count('true') > 0:
            label = 1
        else:
            label = 0
        shape, binary_image = get_image_binary(f)
        example = tf.train.Example(features=tf.train.Features(feature={
            'image/label': int64_feature(label),
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channel': int64_feature(shape[2]),
            'image/encoded': bytes_feature(binary_image)
        }))
        writer.write(example.SerializeToString())
    writer.close()
    def run(self, sect, mode="hourly"):

        self.mode = mode
        if self.mode == "monthly":
            self.num_points = 12
        elif self.mode == "daily":
            self.num_points = 31

        self.parse_data()

        for format in self.format:
            name, tmpname = get_files(self.cp, sect, format=format)
            self.build_canvas(format=format)
            self.draw()
            fd = open(tmpname, 'w')
            self.file = fd
            self.write_graph(format=format)
            fd.flush()
            os.fsync(fd)
            commit_files(name, tmpname)
Example #27
0
def generate_record(images_path, output_path):
    writer = tf.python_io.TFRecordWriter(output_path)
    files = common.get_files(images_path, '*.jepg')
    labels = common.get_sub_directory_name(images_path)
    for f in files:
        label = common.get_parent_dir(f)
        # string label to int
        label = labels.index(label)
        print('label  {}  image  {} '.format(label, f))
        shape, binary_image = get_image_binary(f)
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/label': int64_feature(label),
                'image/height': int64_feature(shape[0]),
                'image/width': int64_feature(shape[1]),
                'image/channel': int64_feature(shape[2]),
                'image/encoded': bytes_feature(binary_image)
            }))
        writer.write(example.SerializeToString())
    writer.close()
Example #28
0
def run(data_dir):
    """
    Run the pipeline, intermediate files go into
    data/extracted, data/parsed, and data/standardized
    which is ingested into ./expenses.db
    """

    cores = mp.cpu_count()
    pool = mp.Pool(cores)
    jobs = []

    raw_dir = os.path.join(data_dir, "raw")
    extracted_dir = os.path.join(data_dir, "extracted")
    parsed_dir = os.path.join(data_dir, "parsed")
    standardized_dir = os.path.join(data_dir, "standardized")

    if len(os.listdir(raw_dir)) == 0:
        return False

    make_dirs([extracted_dir, parsed_dir, standardized_dir])

    with tempfile.TemporaryDirectory() as tmp_standardized_dir:
        for raw, extracted, parsed, standardized in get_pipeline_files(
                raw_dir, extracted_dir, parsed_dir, tmp_standardized_dir):
            jobs.append(
                pool.apply_async(_etl, (raw, extracted, parsed, standardized)))

        [job.get() for job in jobs]
        # TODO: hardcoded expenses tablename and expenses.db
        ingest(
            get_files(tmp_standardized_dir),
            "expenses",
            os.path.join(data_dir, "expenses.db"),
        )
        for file_ in os.listdir(tmp_standardized_dir):
            os.replace(
                os.path.join(tmp_standardized_dir, file_),
                os.path.join(standardized_dir, file_),
            )
    return True
Example #29
0
def replace(directory,
            file_ext,
            mode,
            spaces=8,
            recursive=False,
            overwrite=False,
            verbose=True):
    """
        Method to perform the replacement process.
    """
    pv.path(directory, "input", False, True)
    pv.string(file_ext, "file extension", False, None)
    mode = mode.lower()
    pv.compstr(mode, "mode", ["spaces", "tabs"])
    pv.intvalue(spaces, "spaces", True, False, False)

    directory = os.path.abspath(directory)
    spaces = int(spaces)
    num = 1

    if verbose:
        print "\nGathering files to process. Please wait.\n"

    list_files = common.get_files(directory, file_ext, recursive)
    just = len(str(len(list_files)))
    for file_input in list_files:
        if verbose:
            print "Processing file %s of %s: '%s'" % \
                (str(num).rjust(just, " "), str(len(list_files)), file_input)
            num += 1

        if overwrite:
            __replace_file(file_input, mode, spaces)
        else:
            __replace_copy(file_input, mode, spaces)

    if verbose:
        print "\nFinished.\n"
Example #30
0
total_multimedia_files = 0.0

total_multimedia_files_with_lic_info = 0.0


mediafiles = MediaFiles()
activities = Activities()


for data_folder in constants.data_folders :

	os.chdir(os.path.join(original_path, data_folder))

	for media_type in media_types :

		data_files = common.get_files(".", extensions[media_type])

		for data_file in data_files :

			(path, filename) = os.path.split(data_file)

			(data_file_with_no_extension, extension) = os.path.splitext(data_file)


			par_file_original_URL = constant_undefined
			par_file_license_URL = constant_undefined
			par_author_name = constant_undefined
			par_license_name = constant_undefined

			file_languages = []
import os
import re

import common as common

# set output directory
current_dir = os.getcwd()
data_dir = os.path.join(current_dir, '..', 'data', 'segmented')
output_file = os.path.join(current_dir, '..', 'data', 'reference.txt')

print 'Current directory:\t', current_dir
print 'Data directory:\t\t', data_dir
print 'Output filename:\t', output_file

# get all wav file names
file_names = common.get_files(data_dir, '.wav')[0]

print 'File count:\t', len(file_names)
print 'Sample:\t\t', file_names[:8]

#  generate references
numbers = ['nula', 'jedna ', 'dva', 'tři', 'čtyři', 'pět', 'šest', 'sedum', 'osum', 'devět']

with open(output_file, 'w') as fw:
    fw.write('#!MLF!#\n')

    for file in file_names:
        matchObj = re.match(r'(\d)-(\d)-(\d)\.wav', file)
        index = int(matchObj.group(3))
        file_number = re.sub('\.wav$', '', file)
        fw.write('"*/%s.lab"\n%s\n\n' % (file_number, numbers[index]))
Example #32
0
	def find_tuxes(self) :

		tuxes = common.get_files(constants.folder_award_tuxes, constants.image_extensions)
		
		self.tuxes = common.randomize_list(tuxes)
def main():
    watchB=time.time()
    cp = configure()

    # Set the alarm in case if we go over time
    if cp.notimeout:
        log.debug("Running script with no timeout.")
    else:
        timeout = int(cp.get("Settings", "timeout"))
        signal.alarm(timeout)
        log.debug("Setting script timeout to %i." % timeout)

    # Hourly graphs (24-hours)
    watchS=time.time()
    hjds = HourlyJobsDataSource(cp)
    hjds.run()
    dg = DisplayGraph(cp, "jobs_hourly")
    jobs_data, hours_data = hjds.query_jobs()
    dg.data = [i/1000 for i in jobs_data]
    num_jobs = sum(jobs_data)
    dg.run("jobs_hourly")
    hjds.disconnect()
    log.debug("Time log - Hourly Jobs Query Time: %s", (time.time() - watchS))
    watchS=time.time()
    dg = DisplayGraph(cp, "hours_hourly")
    dg.data = [float(i)/1000. for i in hours_data]
    dg.run("hours_hourly")
    log.debug("Time log - Hourly Jobs Graph Time: %s", (time.time() - watchS))
    # Generate the more-complex transfers graph

    watchS=time.time()
    dst = DataSourceTransfers(cp)
    dst.run()
    log.debug("Time log - Hourly Transfer Query Time: %s", (time.time() - watchS))
    watchS=time.time()
    dg = DisplayGraph(cp, "transfer_volume_hourly")
    dg.data = [i[1]/1024./1024. for i in dst.get_data()]
    log.debug("Transfer volumes: %s" % ", ".join([str(float(i)) for i in \
        dg.data]))
    dg.run("transfer_volume_hourly")
    transfer_data = dst.get_data()
    dg = DisplayGraph(cp, "transfers_hourly")
    dg.data = [long(i[0])/1000. for i in dst.get_data()]
    dg.run("transfers_hourly")
    num_transfers = sum([i[0] for i in transfer_data])
    transfer_volume_mb = sum([i[1] for i in transfer_data])
    dst.disconnect()
    log.debug("Time log - Hourly Transfer Graph Time: %s", (time.time() - watchS))

    # Daily (30-day graphs)
    watchS=time.time()
    dds = DailyDataSource(cp)
    dds.run()
    # Jobs graph
    jobs_data_daily, hours_data_daily = dds.query_jobs()
    dds.disconnect() 
    log.debug("Time log - 30-Day Query Time: %s", (time.time() - watchS))
    # Job count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "jobs_daily")
    dg.data = [float(i)/1000. for i in jobs_data_daily]
    num_jobs_hist = sum(jobs_data_daily)
    dg.run("jobs_daily", mode="daily")
    log.debug("Time log - 30-Day Count Graph Time: %s", (time.time() - watchS))
    # CPU Hours graph
    watchS=time.time()
    dg = DisplayGraph(cp, "hours_daily")
    dg.data = [float(i)/1000000. for i in hours_data_daily]
    num_hours_hist = sum(hours_data_daily) 
    dg.run("hours_daily", mode="daily")
    log.debug("Time log - 30-Day CPU Graph Time: %s", (time.time() - watchS))
    # Transfers data
    watchS=time.time()
    transfer_data_daily, volume_data_daily = dds.query_transfers()
    log.debug("Time log - 30-Day Transfer Query Time: %s", (time.time() - watchS))
    # Transfer count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "transfers_daily")
    dg.data = [float(i)/1000000. for i in transfer_data_daily]
    num_transfers_daily = sum(transfer_data_daily)
    dg.run("transfers_daily", mode="daily")
    log.debug("Time log - 30-Day Transfer Count Graph Time: %s", (time.time() - watchS))
    # Transfer volume graph 
    watchS=time.time()
    dg = DisplayGraph(cp, "transfer_volume_daily")
    dg.data = [float(i)/1024.**3 for i in volume_data_daily]
    volume_transfers_hist = sum(volume_data_daily)
    dg.run("transfer_volume_daily", mode="daily")
    log.debug("Time log - 30-Day Transfer Volume Graph Time: %s", (time.time() - watchS))

    # Monthly graphs (12-months)
    watchS=time.time()
    mds = MonthlyDataSource(cp)
    mds.run()
    # Jobs graph
    jobs_data_monthly, hours_data_monthly = mds.query_jobs()
    mds.disconnect()
    log.debug("Time log - 12-Month Query Time: %s", (time.time() - watchS))
    # Job count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "jobs_monthly")
    dg.data = [float(i)/1000000. for i in jobs_data_monthly]
    num_jobs_monthly = sum(jobs_data_monthly)
    dg.run("jobs_monthly", mode="monthly")
    log.debug("Time log - 12-Month Job Count Graph Time: %s", (time.time() - watchS))
    # Hours graph
    watchS=time.time()
    dg = DisplayGraph(cp, "hours_monthly")
    dg.data = [float(i)/1000000. for i in hours_data_monthly]
    num_hours_monthly = sum(hours_data_monthly)
    dg.run("hours_monthly", mode="monthly")
    log.debug("Time log - 12-Month Hour Graph Time: %s", (time.time() - watchS))
    # Transfers graph
    watchS=time.time()
    transfer_data_monthly, volume_data_monthly = mds.query_transfers()
    log.debug("Time log - 12-Month Transfer Query Time: %s", (time.time() - watchS))
    # Transfer count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "transfers_monthly")
    dg.data = [float(i)/1000000. for i in transfer_data_monthly]
    num_transfers_monthly = sum(transfer_data_monthly)
    dg.run("transfers_monthly", mode="monthly")
    log.debug("Time log - 12-Month Transfer Count Graph Time: %s", (time.time() - watchS))
    # Transfer volume graph
    watchS=time.time()
    dg = DisplayGraph(cp, "transfer_volume_monthly")
    dg.data = [float(i)/1024.**3 for i in volume_data_monthly]
    volume_transfers_monthly = sum(volume_data_monthly)
    dg.run("transfer_volume_monthly", mode="monthly")
    log.debug("Time log - 12-Month Transfer Volume Graph Time: %s", (time.time() - watchS))
    # Pull OIM data
    watchS=time.time()
    ods = OIMDataSource(cp)
    num_sites = len(ods.query_sites())
    ces, ses = ods.query_ce_se()
    log.debug("Time log - OIM Time: %s", (time.time() - watchS))

    # Generate the JSON
    log.debug("Starting JSON creation")
    d = Data(cp)
    d.add_datasource(mds)
    d.add_datasource(hjds)
    d.add_datasource(dst)
    d.add_datasource(dds)
    d.add_datasource(ods)
    # Monthly data
    log.debug("Done creating JSON.")

    name, tmpname = get_files(cp, "json")
    fd = open(tmpname, 'w')
    d.run(fd)
    commit_files(name, tmpname)

    log.info("OSG Display done!")
    log.debug("Time log - Total Time: %s", (time.time() - watchB))
        sys.stdout = fw
        report(args)


# parameters for debugging
if DEBUG:
    argvals = 'mfcc data/segmented feats.hdf5 0.01 0.005 --win_func hamming'.split()
else:
    argvals = None

# parse arguments and print them
args = get_args(argvals)
report(args)

# collect files
file_names, file_paths = common.get_files(args.input_dir, '.wav', verbose=True)

# select window function for framing
if args.win_func == 'rectangular':
    win_func = lambda x: np.ones((x,))
elif args.win_func == 'hamming':
    win_func = lambda x: np.hamming(x)
elif args.win_func == 'hanning':
    win_func = lambda x: np.hanning(x)

# select feature function
if args.features == 'ste':
    feat_func = lambda x, y: fts.get_ste(x, y, args.frame_length, args.frame_step, win_func)
elif args.features == 'sti':
    feat_func = lambda x, y: fts.get_sti(x, y, args.frame_length, args.frame_step, win_func)
elif args.features == 'stzcr':