Example #1
0
    def __init__(self, greyscale=False, n_item=None, resize=None, frame2=False, shiny=False, label=False):
        self.dir = os.path.join(os.environ["DATASETS"], 'pokemon_sprites/emerald/')
        self.dir_shiny = os.path.join(self.dir, 'shiny/')
        self.dir_frame2 = os.path.join(self.dir, 'frame2/')

        self.greyscale = greyscale

        self.images = ut.list_files(os.path.join(self.dir, "*"))
        self.images_shiny = ut.list_files(os.path.join(self.dir_shiny, "*"))
        self.images_frame2 = ut.list_files(os.path.join(self.dir_frame2, "*"))

        self.images += self.images_shiny if shiny else []
        self.images += self.images_frame2 if frame2 else []

        if not resize:
            resize = [64, 64]

        self.general_transform = transforms.Compose([
                            transforms.Resize(resize),
                            transforms.ToTensor()
        ])
        if n_item:
            self.images = np.random.choice(self.images, size=(n_item, )).tolist() * (1000//n_item)

        self.path_to_label = None
        self.label_map = None
        self.load_labels('type1', frame2, shiny)
Example #2
0
 def lp_list(self, config, topos=False, layouts=False):
     """ list module of linchpin  """
     if topos and layouts:
         t_files = list_files(config.clipath+"/ex_topo")
         l_files = list_files(config.clipath+"/inventory_layouts")
         return (t_files, l_files)
     if topos:
         t_files = list_files(config.clipath+"/ex_topo")
         return t_files
     if layouts:
         l_files = list_files(config.clipath+"/inventory_layouts")
         return l_files
def random_style_transfer(in_path = 'images/', out_path = 'processed_images/', checkpoint_path = 'checkpoints', allow_different_dimensions = True, batch_size = 1, device = '/gpu:0'):
    checkpoints = list_files(checkpoint_path)
    files = [fname for fname in list_files(in_path) if fname not in list_files('processed_images/')]
    fullprocess = [(os.path.join(in_path,x),os.path.join(out_path,x),f'{checkpoint_path}/{random.sample(checkpoints,1)[0]}') for x in files]
    for tup in fullprocess:
        print(tup)
        if allow_different_dimensions:
            ffwd_different_dimensions([tup[0]], [tup[1]], tup[2], 
                    device_t=device, batch_size=batch_size)
        else :
            ffwd([tup[0]], [tup[1]], tup[2], device_t=device,
                    batch_size=batch_size)
Example #4
0
 def lp_list(self, config, topos=False, layouts=False):
     """ list module of linchpin  """
     if topos and layouts:
         t_files = list_files(config.clipath + "/ex_topo")
         l_files = list_files(config.clipath + "/inventory_layouts")
         return (t_files, l_files)
     if topos:
         t_files = list_files(config.clipath + "/ex_topo")
         return t_files
     if layouts:
         l_files = list_files(config.clipath + "/inventory_layouts")
         return l_files
Example #5
0
def plot_random_patches(patches_path, n_patches, classes, class_names):
    """ plot random patches with ground truth
    
    arguments
    ---------
        patches_path: string
            path to folder containing the patches 
        n_patches: int
            number of random patches to plot
        classes: list
            list of predicted classes
        class_names: list
            
    output
    ------
        figure with n_patches plotted in first row and ground truth in second row.
    """
    
    images_path = patches_path + 'images/'
    labels_path = patches_path + 'labels/'
    
    rows = 2
    cols = n_patches    
    
    
    # get path images list
    im_list = list_files(images_path, '.npy')
    gt_list = list_files(labels_path, '.npy')
    
    # prepare
    index = np.array([0,1,2])
    fig, ax = plt.subplots(rows,cols)
    
    for i in range(n_patches):
        idx=np.random.randint(len(im_list))
        im = np.load(images_path + im_list[idx])
        gt = np.load(labels_path + gt_list[idx])
        
        # prepare RGB plot
        plt_im = im[:, :, index].astype(np.float64)
        
        # prepare gt plot
        plt_gt  = np.zeros_like(gt, dtype=np.uint8)
        plt_gt = np.argmax(gt, axis=2)
    
        # plot training image
        image = ax[0,i].imshow(plt_im)
        
        # plot gt 
        grtr = ax[1,i].imshow(plt_gt, cmap=cmap, vmin=0, vmax=4) #colors not right
    
    ep.draw_legend(grtr,titles=class_names,classes=classes)
Example #6
0
def load_config(config_directory):
    global global_config, profiles, transproxies

    global_config = yaml.load(
        open(os.path.join(config_directory, "global.yaml")).read())

    profiles = []
    for path in utils.list_files(os.path.join(config_directory, "profiles")):
        profiles.append(yaml.load(open(path).read()))

    get_filename = lambda path: os.path.splitext(os.path.basename(path))[0]
    transproxies = {}
    for path in utils.list_files(os.path.join(config_directory,
                                              "transproxies")):
        transproxies[get_filename(path)] = (yaml.load(open(path).read()))
Example #7
0
def main():
    parser = build_parser()
    opts = parser.parse_args()
    check_opts(opts)

    if not os.path.isdir(opts.in_path):
        if os.path.exists(opts.out_path) and os.path.isdir(opts.out_path):
            out_path = \
                    os.path.join(opts.out_path,os.path.basename(opts.in_path))
        else:
            out_path = opts.out_path

        ffwd_to_img(
            opts.in_path, out_path, opts.checkpoint_dir, device=opts.device)
    else:
        files = list_files(opts.in_path)
        full_in = [os.path.join(opts.in_path, x) for x in files]
        full_out = [os.path.join(opts.out_path, x) for x in files]
        if opts.allow_different_dimensions:
            ffwd_different_dimensions(
                full_in,
                full_out,
                opts.checkpoint_dir,
                device_t=opts.device,
                batch_size=opts.batch_size)
        else:
            ffwd(
                full_in,
                full_out,
                opts.checkpoint_dir,
                device_t=opts.device,
                batch_size=opts.batch_size)
Example #8
0
def main():
    parser = build_parser()
    opts = parser.parse_args()
    check_opts(opts)

    if not os.path.isdir(opts.in_path):
        if os.path.exists(opts.out_path) and os.path.isdir(opts.out_path):
            out_path = os.path.join(opts.out_path,
                                    os.path.basename(opts.in_path))
        else:
            out_path = opts.out_path

        ffwd_to_img(opts.in_path,
                    out_path,
                    opts.checkpoint_dir,
                    device=opts.device)
    else:
        files = list_files(opts.in_path)
        full_in = map(lambda x: os.path.join(opts.in_path, x), files)
        full_out = map(lambda x: os.path.join(opts.out_path, x), files)
        ffwd(full_in,
             full_out,
             opts.checkpoint_dir,
             device_t=opts.device,
             batch_size=opts.batch_size)
Example #9
0
def main():
    parser = build_parser()
    options = parser.parse_args()
    check_opts(options)

    style_target = get_img(options.style)
    content_targets = list_files(options.train_path)
    kwargs = {
            "epochs":options.epochs,
            "print_iterations":options.checkpoint_iterations,
            "batch_size":options.batch_size,
            "checkpoint_dir":os.path.join(options.checkpoint_dir,'fns.ckpt'),
            "summary_dir":options.summary_dir,
            "learning_rate":options.learning_rate
            }
    args = [
            content_targets,
            style_target,
            options.content_weight,
            options.style_weight,
            options.tv_weight,
            options.vgg_path
            ]
    start_time = time.time()
    for preds, losses, i, epoch in optimize(*args, **kwargs):
        style_loss, content_loss, tv_loss, loss = losses
        print('{0} ---------- Epoch: {1}, Iteration: {2}----------'.format(time.ctime(), epoch, i))
        print('Total loss: {0}, Style loss: {1}, Content loss: {2}, TV loss: {3}'
                .format(loss, style_loss, content_loss, tv_loss))
    print("Training complete! Total training time is {0} s".format(time.time() - start_time))
    def test_list_files(self):
        """ Test the function list_files. """
        files = (
            os.path.join('folder_0', 'subfolder_0', 'file_0'),
            os.path.join('folder_0', 'subfolder_0', 'file_1'),
            os.path.join('folder_0', 'subfolder_1', 'file_0'),
            os.path.join('folder_0', 'subfolder_1', 'file_1'),
            os.path.join('folder_1', 'subfolder_0', 'file_0'),
            os.path.join('folder_1', 'subfolder_0', 'file_1'),
            os.path.join('folder_1', 'subfolder_1', 'file_0'),
            os.path.join('folder_1', 'subfolder_1', 'file_1'),
        )

        with tempfile.TemporaryDirectory() as tmpdirname:
            for file_ in files:
                path = os.path.join(tmpdirname, os.path.dirname(file_))

                try:
                    os.makedirs(path)
                except FileExistsError:
                    pass

                with open(os.path.join(tmpdirname, file_), 'w') as _:
                    pass

            files = [os.path.join(tmpdirname, file_) for file_ in files]
            listed_files = list_files(tmpdirname)
            self.assertEqual(sorted(listed_files), sorted(files))
Example #11
0
def main():
    parser = build_parser()
    opts = parser.parse_args()
    check_opts(opts)

    if not os.path.isdir(opts.in_path):
        if os.path.exists(opts.out_path) and os.path.isdir(opts.out_path):
            out_path = \
                    os.path.join(opts.out_path,os.path.basename(opts.in_path))
        else:
            out_path = opts.out_path

        ffwd_to_img(opts.in_path,
                    out_path,
                    opts.checkpoint_dir,
                    device=opts.device)
    else:
        files = list_files(opts.in_path)
        full_in = [os.path.join(opts.in_path, x) for x in files]
        full_out = [os.path.join(opts.out_path, x) for x in files]
        if opts.allow_different_dimensions:
            ffwd_different_dimensions(full_in,
                                      full_out,
                                      opts.checkpoint_dir,
                                      device_t=opts.device,
                                      batch_size=opts.batch_size)
        else:
            ffwd(full_in,
                 full_out,
                 opts.checkpoint_dir,
                 device_t=opts.device,
                 batch_size=opts.batch_size)
def main():
    parser = build_parser()
    opts = parser.parse_args()
    check_opts(opts)

    if not os.path.isdir(opts.in_path):
        if os.path.exists(opts.out_path) and os.path.isdir(opts.out_path):
            out_path = os.path.join(opts.out_path,
                                    os.path.basename(opts.in_path))
        else:
            out_path = opts.out_path
        # 执行风格迁移预测,输入图像为 opts.in_path,转换后的图像为 out_path,模型文件路径为 opts.model
        ffwd_to_img(opts.in_path, out_path, opts.model, device=opts.device)
    else:
        files = list_files(opts.in_path)
        full_in = [os.path.join(opts.in_path, x) for x in files]
        full_out = [os.path.join(opts.out_path, x) for x in files]
        if opts.allow_different_dimensions:
            ffwd_different_dimensions(full_in,
                                      full_out,
                                      opts.model,
                                      device_t=opts.device,
                                      batch_size=opts.batch_size)
        else:
            # 执行风格迁移预测,输入图像的保存路径为 full_in,转换后的图像为 full_out,模型文件路径为 opts.model
            ffwd(full_in,
                 full_out,
                 opts.model,
                 device_t=opts.device,
                 batch_size=opts.batch_size)
def main(style_im, result_im, chkpt):
    parser = build_parser()
    opts = parser.parse_args()

    opts.in_path = style_im
    opts.out_path = result_im
    opts.checkpoint = chkpt

    # check_opts(opts)

    if not os.path.isdir(style_im):
        if os.path.exists(result_im) and os.path.isdir(result_im):
            out_path = \
                    os.path.join(result_im,os.path.basename(style_im))
        else:
            out_path = result_im

        ffwd_to_img(style_im, out_path, chkpt, device=opts.device)
    else:
        files = list_files(style_im)
        full_in = [os.path.join(style_im, x) for x in files]
        full_out = [os.path.join(result_im, x) for x in files]
        if opts.allow_different_dimensions:
            ffwd_different_dimensions(full_in,
                                      full_out,
                                      chkpt,
                                      device_t=opts.device,
                                      batch_size=opts.batch_size)
        else:
            ffwd(full_in,
                 full_out,
                 opts.checkpoint_dir,
                 device_t=opts.device,
                 batch_size=opts.batch_size)
Example #14
0
    def __init__(self,
                 root: str = folder,
                 train_subset: bool = True,
                 suffix: str = '.png',
                 min_num_cls: int = 5,
                 max_num_cls: int = 20,
                 k_shot: int = 20,
                 expand_dim: bool = False,
                 load_images: bool = True) -> None:
        """Initialize a data loader for Omniglot data set or a two-level dataset
            with structure similar to Omniglot: alphabet -> character -> image

        Args:
            root (str): path to the folder of Omniglot data set
            train_subset (bool): if True, this will load data from
                the ``images_background`` folder (or, training set). If False,
                it loads data from ``images_evaluation``` (or, validation set)
            suffix (str): the suffix of images
            min_num_cls (int): minimum number of classes within a generated episode
            max_num_cls (int): maximum number of classes within a generated episode
            expand_dim (bool): if True, repeat the channel dimension from 1 to 3
            load_images (bool): if True, this will place all image data (PIL) on RAM.
                This option is optimal for small data set since it would speed up
                the data loading process. If False, it will load images whenever called.
                This option is suitable for large data set.

        Returns: an OmniglotLoader instance
        """
        self.root = os.path.join(
            root, 'images_background' if train_subset else 'images_evaluation')
        self.suffix = suffix
        self.min_num_cls = min_num_cls
        self.max_num_cls = max_num_cls
        self.k_shot = k_shot
        self.expand_dim = expand_dim
        self.load_images = load_images

        # create a nested dictionary to store data
        self.data = dict.fromkeys(list_dir(root=self.root))
        for alphabet in self.data:
            self.data[alphabet] = dict.fromkeys(
                list_dir(root=os.path.join(self.root, alphabet)))

            # loop through each alphabet
            for character in self.data[alphabet]:
                self.data[alphabet][character] = []

                # loop through all images in an alphabet character
                for img_name in list_files(root=os.path.join(
                        self.root, alphabet, character),
                                           suffix=suffix):
                    if self.load_images:
                        # load images
                        img = _load_image(img_url=os.path.join(
                            self.root, alphabet, character, img_name),
                                          expand_dim=self.expand_dim)
                    else:
                        img = img_name

                    self.data[alphabet][character].append(img)
Example #15
0
def main():
    parser = build_parser()
    opts = parser.parse_args()

    in_dir = os.path.join(opts.tmp_dir, 'in')
    out_dir = os.path.join(opts.tmp_dir, 'out')
    if not os.path.exists(in_dir):
        os.makedirs(in_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    in_args = ['ffmpeg', '-i', opts.in_path, '%s/frame_%%d.png' % in_dir]

    subprocess.call(" ".join(in_args), shell=True)
    base_names = list_files(in_dir)
    in_files = map(lambda x: os.path.join(in_dir, x), base_names)
    out_files = map(lambda x: os.path.join(out_dir, x), base_names)
    evaluate.ffwd(in_files,
                  out_files,
                  opts.checkpoint,
                  device_t=opts.device,
                  batch_size=opts.batch_size)
    fr = 30  # wtf
    out_args = [
        'ffmpeg', '-i',
        '%s/frame_%%d.png' % out_dir, '-f', 'mp4', '-q:v', '0', '-vcodec',
        'mpeg4', '-r',
        str(fr), opts.out
    ]

    subprocess.call(" ".join(out_args), shell=True)
    print 'Video at: %s' % opts.out
    shutil.rmtree(opts.tmp_dir)
Example #16
0
    def predict(self):
        if self.model is None:
            self.build_model()
        if not os.path.isdir(ROOT_PATH):
            os.makedirs(ROOT_PATH)

        path = TEST_PATH
        files = list_files(path)
        pix = []
        for f in files:
            pix_file = os.path.join(path, f)
            pix_data = read_gray(pix_file)
            pix.append(pix_data)
            # print(pix_file)

        pix = np.array(pix)
        print("Test image max: ", np.amax(pix))
        pix = pix / 255.0
        print("Normalized image max: ", np.amax(pix))
        ipix = np.expand_dims(pix, axis=3)

        for i in range(ipix.shape[0]):
            img = ipix[i]
            img = np.expand_dims(img, axis=0)
            out_pix = self.model.predict([img, img, img, img])
            out_pix[out_pix >= self.thresh] = 1.0
            out_pix[out_pix < self.thresh] = 0.0
            out_pix = np.squeeze(out_pix) * 255.0
            out_pix = out_pix.astype(np.uint8)
            path = os.path.join(ROOT_PATH, files[i])
            print("Saving ... ", path)
            imsave(path, out_pix, cmap='gray')
Example #17
0
    def generate_barplots(self, sub_directory):
        csv_files = [
            f for f in list_files(sub_directory) if f.endswith('.csv')
        ]
        fig, axPS = plt.subplots(nrows=len(csv_files),
                                 ncols=3,
                                 figsize=(90, 60))

        for i, csv_file in enumerate(csv_files):
            title = self.plot_title(csv_file)
            data = pd.read_csv(csv_file)

            low_f1 = self.make_barplot(data,
                                       axPS[i][0],
                                       y='Low_F1',
                                       title=title)
            medium_f1 = self.make_barplot(data,
                                          axPS[i][1],
                                          y='Medium_F1',
                                          title=title)
            high_f1 = self.make_barplot(data,
                                        axPS[i][2],
                                        y='High_F1',
                                        title=title)

        fig.savefig(os.path.join(sub_directory, 'plots.png'))
        plt.close(fig)
def create_BOW(root_directory='./preprocessed_texts/'):
    """
    :type root_directory: str
    """
    training_path = os.path.join(root_directory, "training")

    training_bag_of_author = {}
    # super_counter = Counter()
    doc_count_of_author = {}

    authors = list_dirs(training_path)
    # total_doc_count = 0

    for author in authors:
        bag = Counter()

        author_path = os.path.join(training_path, author)
        files_of_author = list_files(author_path)

        for filename in files_of_author:
            file_path = os.path.join(author_path, filename)
            tokens = tokenize_file(file_path)
            bag += Counter(tokens)

        training_bag_of_author[author] = bag
        doc_count = len(files_of_author)
        doc_count_of_author[author] = doc_count
        # total_doc_count += doc_count

        # super_counter += bag

    # print(super_counter.most_common(10))
    return training_bag_of_author, doc_count_of_author
Example #19
0
    def __init__(self, mode = 'argument'):
        if mode not in ['argument', 'trigger', 'joint']:
            print 'ERROR, wrong mode of calling FeatureVector class! '

        #get handles to all phi functions
        self.methods = inspect.getmembers(self, predicate=inspect.ismethod)
        self.mode = mode
        if self.mode == 'argument' or self.mode == 'trigger':
            self.phi_list = [method[1] for method in self.methods if 'phi_'+mode in method[0]]
        elif mode == 'joint':
            self.phi_list_arg = [method[1] for method in self.methods if 'phi_argument' in method[0]]
            self.phi_list_trig = [method[1] for method in self.methods if 'phi_trigger' in method[0]]

        #load relevant other data from presaved files.
        self.listOfAllFiles = utils.list_files()
        self.all_grammar_tags = utils.get_grammar_tag_list()
        self.trigger_list = utils.get_trigger_list()

        self.stem_list_triggers = utils.create_stem_list_trigger(cutoff = 5, load=True)
        self.stem_list_arguments = utils.create_stem_list_arguments(cutoff = 5, load=True)
        self.mod_list_triggers = utils.create_mod_list_trigger(cutoff = 25, load=False)
        self.arguments_list = [u'None', u'Theme', u'Cause']
        
        self.dep_list_total = utils.identify_all_dep_labels(load = True) 
        self.trig2arg_deps = utils.create_dep_list_trig2arg(cutoff = 2, load = True)
def main(
    original_path: str,
    executable_path: str = EXECUTABLE_PATH,
    destination_path: str = None,
    overwrite: bool = False,
):
    """ Run batch conversion process.

    Args:
        original_path: path to the dataset to convert.
        executable_path: path to the converter executable.
        destination_path: destination path.
        overwrite: if True, any previous .edf
        with the same name will be overwrited.
    """
    print('1 - List the files to convert...')
    files = [
        os.path.abspath(file_) for file_ in list_files(original_path)
        if os.path.basename(file_).lower().endswith('.eeg')
    ]

    n_files = len(files)
    print('{0} file(s) will be converted.'.format(n_files))

    print('2 - Convert files')
    for index, file_ in enumerate(sorted(files), start=1):
        # Destination file path
        if destination_path is None:
            file_destination_path = file_[:-4] + '.EDF'
        else:
            file_destination_path = os.path.join(
                destination_path, os.path.relpath(file_, original_path))

        print(
            '({0}/{1}) Convert "{2}" to "{3}"'.format(
                index,
                n_files,
                file_,
                file_destination_path,
            ), )

        ensure_path(path=os.path.dirname(file_destination_path))

        if os.path.isfile(file_destination_path) and not overwrite:
            print('File has already been converted.')
        else:
            if os.path.isfile(file_destination_path):
                print('File has already been converted (will be overwrited).')
            convert_coh3_to_edf(
                executable_path=executable_path,
                eeg_path=file_,
                edf_path=file_destination_path,
            )

    if n_files:
        print('3 - Kill the converter process(es).')
        os.system(
            'taskkill /f /im {0}'.format(
                os.path.basename(executable_path), ), )
Example #21
0
def create_mdtm_files(base_path, train_frac, val_frac):
    files = list(list_files(base_path, lambda x: x.endswith("cha_pickle")))
    random.shuffle(files)
    n = len(files)
    mdtm_helper("train", files[:int(train_frac * n)])
    mdtm_helper("validation",
                files[int(train_frac * n):int((train_frac + val_frac) * n)])
    mdtm_helper("test", files[int((train_frac + val_frac) * n):])
def checkName():
    name = request.form['name']
    data = {'res': 1}

    if name in utils.list_files(baseDir):
        data['res'] = 0

    return jsonify(data)
Example #23
0
 def __init__(self, root):
     self.root = root
     if not posixpath.exists(posixpath.join(self.root, self.ukbench_dir)):
         download(self.root, self.filename, self.url)
         unzip(self.root, self.filename, self.ukbench_dir)
     self.uris = sorted(
         list_files(root=posixpath.join(self.root, self.ukbench_dir,
                                        'full'),
                    suffix=('png', 'jpg', 'jpeg', 'gif')))
Example #24
0
 def __init__(self, root):
     self.root = root
     if not posixpath.exists(posixpath.join(self.root, self.ukbench_dir)):
         download(self.url, self.root, self.filename, untar=True)
     self.uris = list_files(posixpath.join(self.root,
                                           self.ukbench_dir,
                                           'full'),
                            ('png', 'jpg', 'jpeg', 'gif'))
     self.uris.sort()
Example #25
0
    def load_files(self):
        self.reset_state()
        filetypes = constants.FILETYPES[self.get_mode()]['extensions']

        self.found_files_frame.set_files(
            utils.list_files(self.get_sd_card_root(), filetypes), filetypes)
        self.update_options()

        self.load_playlists()
def _get_files_and_labels(img_dir):
    files = utils.list_files(img_dir)
    labels = [CLASS_MAP[x.split('_')[2][:-4]] for x in files]
    files, labels = utils.resample_unbalanced_data(files, labels)
    unique, counts = np.unique(labels, return_counts=True)
    print '{} labels has counts as: {}'.format(unique, counts)

    return [os.path.join(img_dir, x)
            for x in files], [utils.to_one_hot(x, 3) for x in labels]
Example #27
0
def get_in_pix(filename="in_pix.npy", ispix=True, isskel=False, istest=False):
    path = PX_PATH
    if istest:
        path = PT_PATH
    if isskel:
        path = SK_PATH
    if not ispix:
        path = path.replace("pixel", "point")
    files = list_files(path)
    pix = []
    pmax = 0
    pmin = 255
    maxpts = 0
    for f in files:
        pix_file = os.path.join(path, f)
        print(pix_file)
        if ispix:
            pix_data = read_gray(pix_file)
        else:
            image = np.zeros((256, 256), dtype=np.uint8)
            pix_data = read_points(pix_file)
            if len(pix_data) > maxpts:
                maxpts = len(pix_data)
            for p in pix_data:
                if p[0] > pmax:
                    pmax = p[0]
                if p[0] < pmin:
                    pmin = p[0]
                if p[1] > pmax:
                    pmax = p[1]
                if p[1] < pmin:
                    pmin = p[1]
                x = min(round(p[0]), 255)
                y = min(round(p[1]), 255)
                image[x][y] = 255
            impath = os.path.join("tmp", f + ".png")
            print("Saving ... ", impath)
            imsave(impath, image, cmap='gray')
            pix_data = image

        pix.append(pix_data)

    # Max pts:  12270
    print("Max pts: ", maxpts)
    pix = np.array(pix)
    print("Shape: ", pix.shape)
    print("PMin: ", pmin)
    print("PMax: ", pmax)
    if not istest:
        pix = np.expand_dims(pix, axis=3)
    print("Final shape: ", pix.shape)
    print("Min: ", np.amin(pix))
    print("Max: ", np.amax(pix))
    if not istest:
        print("Saving to ", filename)
        np.save(filename, pix)
    return pix
Example #28
0
    def load_data_from_directory(self, directory):
        data = {}
        for filename, extension, filepath in list_files(directory):
            extractor = self.get_extractor_for_extension(extension)

            if extractor:
                data[filename] = extractor.extract(filepath)

        return data
Example #29
0
def _get_files(dir):
    """
        Construct dir file list.
        Arguments:
            'dir' - directory for searching
        Returns:
            'files' - array of full file names
    """
    files = list_files(dir)
    return [os.path.join(dir, x) for x in files]
Example #30
0
def copy_supporting_files(start_path, destination):
    for file in list_files(start_path):
        if not (file.startswith("_") or file.startswith(".")):
            print("copying: %s to: %s" % (file, destination))
            copy_file(path.join(start_path, file), path.join(destination, file))

    for dir in list_dirs(start_path):
        if not (dir.startswith("_") or dir.startswith(".")):
            print("copying: %s to: %s" % (dir, destination))
            copy_tree(path.join(start_path, dir), path.join(destination, dir))
Example #31
0
def copy_folder(src, dst, recursive, link):
    logger = logging.getLogger(__name__)

    svc = utils.get_gdrive_service(SCOPES=SCOPES)
    print("Source: ", utils.full_path(svc, src, tpath=[]))
    print("Destn:  ", utils.full_path(svc, dst, tpath=[]))

    for fileinfo in utils.iterfiles(svc, parent=src):
        if dst not in fileinfo['parents']:
            if link:
                update_result = svc.files().update(
                    fileId=fileinfo['id'],
                    addParents=dst,
                    fields='id, parents'
                    ).execute()
                print("update result ", update_result)
            else:
                raise NotImplementedError("TODO")
    utils.list_files(utils.iterfiles(svc, parent=src))
Example #32
0
def find_files(start_path, extensions=[".txt"]):
    """
    digs through a directory looksing for text files and directories
    that don't start in . or _
    """
    for file in list_files(start_path):
        name, ext= path.splitext(file)
        if not (file.startswith("_") or file.startswith(".")) and (ext in extensions):
            filename = path.join(start_path, file)
            yield filename
def main(path: str, destination_path: str, use_folder_as_name: bool = True):
    """Main process.

    Args:
        path: path to the dataset.
        destination_path: destination path to the anonymised dataset.
        If not set, the files will be overwritten.
        use_folder_as_name: fill the name field with the name of the parent
        folder.
    """
    files_in_dataset = [
        eeg for eeg in list_files(path) if eeg.lower().endswith('.eeg')
    ]

    def folder_name(path):
        return os.path.basename(os.path.dirname(path))

    number_of_files = len(files_in_dataset)

    for file_index, file_ in enumerate(sorted(files_in_dataset), start=1):
        # Set name as the parent folder's name or as an empty field
        if use_folder_as_name:
            field_name = folder_name(file_)
        else:
            field_name = ''

        # Destination file path
        if destination_path is None:
            file_path = file_
        else:
            file_path = os.path.join(
                destination_path,
                os.path.relpath(file_, path)
            )

        print(
            '\nCurrent file ({0}/{1}):'.format(file_index, number_of_files),
            file_,
            '-->',
            file_path,
        )

        try:
            anonymise_eeg_verbose(
                file_, file_path, field_name=field_name, verbose=True
            )
        except MemoryError:
            print('MemoryError: retry...')
            try:
                anonymise_eeg_verbose(
                    file_, file_path, field_name=field_name, verbose=True
                )
            except MemoryError:
                print('MemoryError: not able to process the file')
                traceback.print_exc()
Example #34
0
def calc_metrics(ds_path):
    file_paths = utils.list_files(ds_path, BTS_SUBDIR, BTS_DATA_SUBDIR)
    max_depth = 0
    for file_path, file_name in file_paths:
        depth_image = Image.open(file_path).convert('I')
        depth = np.asarray(depth_image, np.int32)
        depth = depth.astype(np.float32) / 1000
        m = depth.max()
        print(file_name, m)
        max_depth = max(max_depth, m)
    print('---- Max depth:', max_depth)
Example #35
0
 def lp_layout_list(self, upstream=None):
     """
     search_order : list layouts from upstream if mentioned
                    list layouts from core package
     """
     if upstream is None:
         l_files = list_files(self.base_path + "/inventory_layouts")
         return l_files
     else:
         g = GitHub(upstream)
         l_files = []
         files = g.list_files("inventory_layouts")
         return files
Example #36
0
 def lp_topo_list(self, upstream=None):
     """
     search_order : list topologies from upstream if mentioned
                    list topologies from current folder
     """
     if upstream is None:
         t_files = list_files(self.base_path + "/ex_topo")
         return t_files
     else:
         print "getting from upstream"
         g = GitHub(upstream)
         t_files = []
         files = g.list_files("ex_topo")
         return files
def main():
    parser = build_parser()
    opts = parser.parse_args()

    if opts.no_disk:
        evaluate.from_pipe(opts)
    else:
        in_dir = os.path.join(opts.tmp_dir, 'in')
        out_dir = os.path.join(opts.tmp_dir, 'out')
        if not os.path.exists(in_dir):
            os.makedirs(in_dir)
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        in_args = [
            'ffmpeg',
            '-i', opts.in_path,
            '%s/frame_%%d.png' % in_dir
        ]

        subprocess.call(" ".join(in_args), shell=True)
        base_names = list_files(in_dir)
        in_files = list(map(lambda x: os.path.join(in_dir, x), base_names))
        out_files = list(map(lambda x: os.path.join(out_dir, x), base_names))
        evaluate.ffwd(in_files, out_files, opts.checkpoint, device_t=opts.device,
                      batch_size=opts.batch_size)
        fr = 30 # wtf
        out_args = [
            'ffmpeg',
            '-i', '%s/frame_%%d.png' % out_dir,
            '-f', 'mp4',
            '-q:v', '0',
            '-vcodec', 'mpeg4',
            '-r', str(fr),
            opts.out
        ]

        subprocess.call(" ".join(out_args), shell=True)
        print('Video at: %s' % opts.out)
        shutil.rmtree(opts.tmp_dir)
def calculate_confusion_matrix(training_bags, doc_counts,
                               output_path='./preprocessed_texts/'):
    authors = list(training_bags.keys())
    confusion_matrix = np.zeros([len(authors), len(authors)], dtype=np.integer)

    test_path = os.path.join(output_path, "test")

    for i, author in enumerate(authors):
        # bag = Counter()
        author_path = os.path.join(test_path, author)
        files_of_author = list_files(author_path)

        for filename in files_of_author:
            file_path = os.path.join(author_path, filename)
            tokens = tokenize_file(file_path)
            author_candidates = calculate_probability_of_author(
                tokens=tokens,
                training_bags=training_bags,
                doc_counts=doc_counts)
            candidate_index = authors.index(author_candidates[0][0])
            confusion_matrix[i, candidate_index] += 1
    # print(confusion)
    return confusion_matrix
Example #39
0
def _get_files(img_dir):
    files = list_files(img_dir)
    return [os.path.join(img_dir, x) for x in files]
Example #40
0
def main():

	################### EXPLORATORY DATA ANALYSIS #############################

	# Just testing my functions a bit
	list_of_files = utils.list_files()
	print (list_of_files[0])
	f1 = utils.load_json_file(list_of_files[0])
	pprint(len(f1['sentences']))
	    
	# Finding and counting all event triggers
	t = utils.get_all_triggers(list_of_files)
	print("Number of distinct event triggers: {0}".format(len(t.keys())))
	pprint(t)

	# Finding and counting all possible arguments (=relationship labels)
	arg = utils.get_all_arguments(list_of_files)
	print("Number of relation arguments: {0}".format(len(arg.keys())))
	pprint(arg)

	########################## NAIVE BAYES ####################################

	# Crossvalidation
	rates = [0.5,0.6,0.7,0.8,0.9,0.95]
	# x = crossvalidation_experiment(rates, list_of_files, load=True, mode='trig', k=3)
	# pprint(x)

	# x2 = crossvalidation_experiment(rates, list_of_files, load=True, mode='arg', k=3)
	# pprint(x2)

	## Naive Bayes on trigger
	# Read data
	print "Experiment 1: Naive Bayes predicting triggers"
	FV_trig = feature_vector.FeatureVector('trigger')
	train_list, valid_list = utils.create_training_and_validation_file_lists(list_of_files)

	X_train, y_train = build_dataset(train_list, FV_trig, ind=1, kind='train', mode='trig', clf='nb', load=True)
	X_train, y_train = subsample(X_train, y_train, clf='nb', subsampling_rate=0.50)
	X_valid, y_valid = build_dataset(valid_list, FV_trig, ind=1, kind='valid', mode='trig', clf='nb', load=True)

	NB_trig = nb.NaiveBayes()
	NB_trig.train(np.asarray(X_train.todense()),np.asarray(y_train))

	# print "Evaluate Naive Bayes classifer predicting triggers on the train set..."
	# CM, prec, rec, F1 = NB_trig.evaluate(np.asarray(X_train.todense()), np.asarray(y_train))
	# print "Precision: {0}".format(prec)
	# print "Recall: {0}".format(rec)
	# print "F1-measure: {0}".format(F1)
	# print "Confusion matrix:\n", np.int64(CM)

	print "Evaluate Naive Bayes classifer predicting triggers on the validation set..."
	CM, prec, rec, F1 = NB_trig.evaluate(np.asarray(X_valid.todense()), np.asarray(y_valid))
	print "Precision: {0}".format(prec)
	print "Recall: {0}".format(rec)
	print "F1-measure: {0}".format(F1)
	print "Confusion matrix:\n", np.int64(CM)

	## Naive Bayes on argument

	print "Experiment 2: Naive Bayes predicting arguments"
	FV_arg = feature_vector.FeatureVector('argument')

	X_train, y_train = build_dataset(train_list, FV_arg, ind=1, kind='train', mode='arg', clf='nb', load=True)
	X_train, y_train = subsample(X_train, y_train, clf='nb', subsampling_rate=0.50)
	X_valid, y_valid = build_dataset(valid_list, FV_arg, ind=1, kind='valid', mode='arg', clf='nb', load=True)

	NB_arg = nb.NaiveBayes()
	NB_arg.train(np.asarray(X_train.todense()), np.asarray(y_train))

	# print "Evaluate Naive Bayes classifer predicting arguments on the train set..."
	# CM, prec, rec, F1 = NB_arg.evaluate(np.asarray(X_train.todense()), np.asarray(y_train))
	# print "Precision: {0}".format(prec)
	# print "Recall: {0}".format(rec)
	# print "F1-measure: {0}".format(F1)
	# print "Confusion matrix:\n", np.int64(CM)

	print "Evaluate Naive Bayes classifer predicting arguments on the validation set..."
	CM, prec, rec, F1 = NB_arg.evaluate(np.asarray(X_valid.todense()), np.asarray(y_valid))
	print "Precision: {0}".format(prec)
	print "Recall: {0}".format(rec)
	print "F1-measure: {0}".format(F1)
	print "Confusion matrix:\n", np.int64(CM)
Example #41
0
import os
import perceptron_sketch as perc
import feature_vector
import cPickle
import json


#for classification of error types:
#test_files_list =['C:/Python27/aaa_UCL/Natural Language Processing/assignment2/PMID-1653950.json']


#for running the test data:
test_path_inputs ='C:/Python27/aaa_UCL/Natural Language Processing/assignment2/bionlp2011genia-statnlp-test-clean/*.json'
test_files_output_dir = 'C:/Python27/aaa_UCL/Natural Language Processing/assignment2/predictions/'

test_files_list = utils.list_files(path=test_path_inputs)
if not os.path.exists(test_files_output_dir):
    os.makedirs(test_files_output_dir)

    
evaluate_test_list = test_files_list
FV_arg = feature_vector.FeatureVector('argument')
FV_trig =  feature_vector.FeatureVector('trigger')

#load weights of pretrained perceptron. 
with open('Perceptron_trigger.data', 'rb') as f:
    Lambda_e, misc_e = cPickle.load(f)
with open('Perceptron_argument.data', 'rb') as f:
    Lambda_a, misc_a = cPickle.load(f)
    
   
def _get_files(img_dir):
    files = list_files(img_dir)
    return map(lambda x: os.path.join(img_dir,x), files)
Example #43
0
import sys
sys.path.append((sys.path[0]+'/pysrc'))
import os
from utils import print_msg,get_filename,dump_csv,classify_symms_args,list_files,dump_typified_symm_file2,dump_typified_symm_file
import stats
import parser as myparser
import analyzer

parser = classify_symms_args()

# parse command line arguments
args = parser.parse_args()

problems = []
if not args.symmfolder is None:
    problems = list_files(args.symmfolder, '.symm')
else:
    parser.error('You must provide a folder to process.')

print_msg('===========================================')
print_msg(('Starting classify_symms.py'))
print_msg('===========================================')
print_msg(('Variable File: %s' % args.varfile ))
print_msg(('Input Folder: %s ' % args.symmfolder))
print_msg(('Output Folder: %s' % args.folder))
print_msg(('Human Readable: %s' % (not args.computer)))
print_msg('-------------------------------------------',args.silent)
print_msg(('%s files were find' % len(problems)),args.silent)

print_msg(('Loading variables file: %s' % args.varfile),args.silent)
vars_t = analyzer.load_vars_file(args.varfile)