Esempio n. 1
0
def parse_raw_emdr_data(data_folder):

    filenames = utils.get_filenames(data_folder)

    total_orders = []
    total_history = []
    for f in filenames:
        try:
            j = json.loads(utils.read_json(f, data_folder))
            rowsets = j['rowsets']
            resultType = j['resultType']

            if resultType=='orders':
                for rowset in rowsets:
                    for row in rowset['rows']:
                        total_orders.append(row)

            elif resultType=='history':
                for rowset in rowsets:
                    typeID = rowset['typeID']
                    regionID = rowset['regionID']
                    for row in rowset['rows']:
                        row.append(typeID)
                        row.append(regionID)
                        total_history.append(row)

            else:
                print '[x] Result type is not orders or history.'

        except Exception as e:
            print 'Filename: ' + f
            print e

    return total_orders, total_history
Esempio n. 2
0
def get_full_light_curve(kic):
    light_curve = utils.pd.DataFrame()
    filenames = utils.get_filenames(utils.BASE_PATH + str(kic), "csv")
    for filename in filenames:        
        data = utils.pd.read_csv(utils.BASE_PATH + str(kic) + "/" + filename)    
        light_curve = light_curve.append(data)
    return light_curve
Esempio n. 3
0
 def load_widget_type_listbox(self):
     self.widget_type_filenames = utils.get_filenames("templates")
     widget_types = [
         filename.replace(".py", "")
         for filename in self.widget_type_filenames
     ]
     self.load_listbox(self.widget_type_listbox, widget_types)
Esempio n. 4
0
def get_data_frames(kic):
    df_list = []
    filenames = utils.get_filenames(utils.BASE_PATH + str(kic), "csv")
    for filename in filenames:        
        data = utils.pd.read_csv(utils.BASE_PATH + str(kic) + "/" + filename)
        df_list.append(data)
    return df_list
Esempio n. 5
0
def read_csv(kic):
    periods = []
    df_list = []
    filenames = utils.get_filenames(utils.BASE_PATH + str(kic), "csv")

    if len(filenames) <= 5:
        return {"df_list": df_list, "period": 0.0}
    for idx, filename in enumerate(filenames):
        if idx > 2:
            data = utils.pd.read_csv(utils.BASE_PATH + str(kic) + "/" +
                                     filename)
            try:
                res = utils.get_signal_parameters(data.dropna().TIME,
                                                  data.dropna().PDC_NORM_FILT)
                periods.append(res["period"])
            except Exception as e:
                print(e)
                print(idx)
                print(kic)

            df_list.append(data)

    df = utils.pd.DataFrame()
    for _df in df_list:
        df = df.append(_df)

    period = utils.get_period(df.TIME, df.PDC_NORM_FILT, df.EFPDC, periods)

    return {"df_list": df_list, "period": period}
Esempio n. 6
0
def generate(ctx, **kwargs):
    """Generate synthetic images of SEM scanned surfaces.

    Output format will be TIFF, and generated images will have sequential
    names.
    """
    if (kwargs['use_params'] is not None and not os.path.exists(
            os.path.join(kwargs['use_params'], 'params.json'))):
        raise click.UsageError(
            "params.json not found in directory '{0}'.".format(
                ctx.obj['use_params']),
            ctx=ctx)

    ctx.obj['dst_path'] = kwargs['destination_dir']
    ctx.obj['image_dim'] = kwargs['dim']
    ctx.obj['image_n'] = kwargs['number']
    ctx.obj['to_write'] = utils.get_filenames(ctx.obj['dst_path'],
                                              ctx.obj['image_n'],
                                              overwrite=kwargs['overwrite'])
    ctx.obj['log_params'] = kwargs['log_params']
    ctx.obj['use_params'] = kwargs['use_params']
    ctx.obj['overwrite'] = kwargs['overwrite']

    logging.info("Enqueued {0:d} images.".format(ctx.obj['image_n']))
    logging.info("Images will be generated starting from index: '{0}'".format(
        click.format_filename(os.path.basename(ctx.obj['to_write'][0]))))
Esempio n. 7
0
 def get_widgets(self):
     return sorted([
         widget for widget in [
             self.server.get_widget(widget_id=filename.replace(".py", ""))
             for filename in utils.get_filenames("widgets")
         ] if widget is not None
     ],
                   key=lambda x: x.name)
Esempio n. 8
0
 def load_filenames(self, path):
     """Loads filenames of tracks from working directory"""
     self.notify(
         'Loading files from {path}...'.format(path=path),
     )
     self.filenames = utils.shuffle(
         utils.get_filenames(path),
         seed=self._seed,
     )
     self.notify('{count} files loaded.'.format(count=len(self.filenames)))
Esempio n. 9
0
def predict_input_fn(root, img_size, batch_size, buffer_size):
    filenames = get_filenames(root, shuffle=False, is_test=True)
    dataset = input_fn_helper(filenames, img_size, is_test=True)
    dataset = dataset.prefetch(buffer_size).batch(batch_size)
    dataset = dataset.repeat(1)

    images_x = dataset.make_one_shot_iterator().get_next()

    features_dic = {'image': images_x}
    return features_dic
Esempio n. 10
0
def plot_some_imgs(root):
    print('Displaying 100 images')

    filenames = get_filenames(root)
    indices = np.random.choice(len(filenames), size=100)
    plt.figure(figsize=(10, 10))
    for i, index in enumerate(indices):
        plt.subplot(10, 10, i+1)
        plt.imshow(cv2.cvtColor(cv2.imread(filenames[index]), cv2.COLOR_BGR2RGB))
        plt.axis('off')
    plt.tight_layout()
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.show()
Esempio n. 11
0
 def handle_code_updated(self):
     widgets = [
         widget for widget in [
             self.get_widget(widget_id=filename.replace(".py", ""))
             for filename in utils.get_filenames("widgets")
         ] if widget is not None
     ]
     for widget in widgets:
         with open(f"widgets/{widget.id}.py") as f:
             code = f.read()
             deps = [
                 tmp.id for tmp in widgets if f"widgets.{tmp.name}" in code
             ]
             widget.update(dependencies=deps)
Esempio n. 12
0
def get_period(kic):
    frequencies = []
    df_list = []
    filenames = utils.get_filenames(utils.BASE_PATH + str(kic), "csv")
    if len(filenames) <= 1:
        return {"period": 0.0, "fap": 0.0, "theta": 0.0, "periods": []}
 
    for idx, filename in enumerate(filenames):
        if (idx > 2):
            data = utils.pd.read_csv(utils.BASE_PATH + str(kic) + "/" + filename)
            try:
                freq = utils.get_freq_LS(data.TIME.to_numpy(),data.PDCSAP_FLUX.to_numpy(),data.EFPDC.to_numpy())
                frequencies.append(freq)
            except Exception as e:
                print(e)
                print(idx)
                print(kic)

            df_list.append(data)
    
    df = utils.pd.DataFrame()
    for _df in df_list:
        df = df.append(_df)        
          
    t = df.TIME.to_numpy()
    y = df.FPDC.to_numpy()
    dy = df.EFPDC.to_numpy()
    
    period1 = utils.get_period(t, y, dy, frequencies)
    period2 = utils.get_period(t, y, dy)    
    
    periods = [period1, period2]
    nbins = 3
    
    if period2 < 0.09 or period2 > 100:
        period = period1
        theta = None
    else:
        try:  
            period, theta = utils.get_period_pdm(t, y, dy, periods, nbins)
        except:
            period = utils.median(periods) 
            theta = None   
    
    df = None
    data = None
    df_list = []
    return {"period": period, "theta": theta, "periods": periods}
Esempio n. 13
0
def val_data_feed(data_dict=None):
    data_dir = data_dict['data_dir']
    img_ext_lists = data_dict['img_ext_lists']
    label_ext = data_dict['label_ext']
    flatten = data_dict['flatten']
    img_name_list, label_name_list = \
        get_filenames([data_dir], img_ext_lists, label_ext)
    data_size = len(img_name_list)
    img_list = []
    label_list = []
    for index in range(data_size):
        img = cv2.imread(img_name_list[index])
        label = load_label(label_name_list[index], flatten)
        img_list.append(img)
        label_list.append(label)
    return np.array(img_list), np.array(label_list)
Esempio n. 14
0
def main():
    # Get connection params
    python_file, host, port = sys.argv
    port = int(port)

    # Get filenames
    filenames = get_filenames("data")
    if filenames == []:
        logging.error("Directory with data is empty. Exit program.")
        print("Directory with data is empty. Exit program.")
        return

    # Start connection (else: exit script)
    client = create_connection(host=host, port=port)
    if client is None:
        logging.error("Connection failed. Exit program.")
        print("Connection failed. Exit program.")
        return

    # db instance (creating or getting)
    db = client[main_doc_name]

    # Collections instances (creating ot getting)
    main_table = db[main_table_name]
    helper_table = db[helper_table_name]

    # Insert data
    client, bool_inserted = insert_data_mongo(client, main_table, helper_table,
                                              filenames)
    if not bool_inserted:
        if client is not None:
            client.close()
        logging.error("Data insertion error. Exit Program.")
        print("Data insertion error. Exit Program.")
        return

    # Query data and add to .csv
    db_query(main_table)

    # Drop collection
    main_table.drop()

    # End connection
    client.close()
Esempio n. 15
0
def test_suite(size='50k', method='upload', provider='cos'):

    methods = {
        'oss': {
            'upload': upload_oss,
            'download': download_oss,
            'delete': delete_oss,
        },
        'cos': {
            'upload': upload_cos,
            'download': download_cos,
            'delete': delete_cos,
        }
    }

    data = {
        provider: [],
    }

    filenames = get_filenames('%s' % size)
    num = 0

    for file in filenames:
        elapsed = methods[provider][method](file)
        if elapsed != 0:
            data[provider].append(elapsed)
            num += 1

    for item in data:
        total = float(sum(data[item]))
        avg = total / max(num, 1)
        print '%s: total %s for %s files is:  %0.3f ms' % (item, method, size,
                                                           total)
        print '%s: average %s for %s files is:  %0.3f ms' % (item, method,
                                                             size, avg)

    csv_file = 'test_%s_%s_%s.csv' % (size, method, provider)
    write_to_csv(data, csv_file)

    final_csv = 'final_%s_%s_%s.csv' % (size, method, provider)
    with open(final_csv, 'w') as f:
        w = csv.writer(f)
        w.writerow(['平均值', '总数'])
        w.writerow([avg, total])
def pipe_train_val(data_dir, img_ext_lists=None):
    """Pipeline of saving imgs in train and val dir"""
    img_name_list, label_name_list = \
        get_filenames(data_dir=data_dir, img_ext_lists=img_ext_lists, label_ext=label_ext)

    count = 0
    time_total = 0.0
    for img_path, label_path in zip(img_name_list, label_name_list):
        img = cv2.imread(img_path)
        landmark = np.genfromtxt(label_path)
        prediction, occlu_ratio, time_pass = get_pipe_data(img)
        delta = np.concatenate((landmark, prediction, occlu_ratio))
        np.savetxt(os.path.splitext(img_path)[0] + '.wdpts',
                   delta,
                   fmt='%.10f')
        count += 1
        if data_param['print_debug'] and count % 500 == 0:
            logger('saved {} wdpts'.format(count))
        time_total += time_pass
    logger("average speed for processing is {} fps".format(
        float(count) / time_total))
Esempio n. 17
0
def main():
    args = engine.parser.parse_args()
    filenames = utils.get_filenames(args.input)
    files_exist = len(filenames) != 0
    stopwords_exist = os.path.isfile(args.stopwords)
    if files_exist and stopwords_exist:
        used_tokenizer = engine.tokenizers[args.tokenizer]
        if used_tokenizer.has_rule(rules.stopping):
            used_tokenizer.make_rule(rules.stopping, args.stopwords)
        values = ['store_positions', 'calculate_tfidf']
        combinations = [{
            key: value
            for key, value in zip(values, option)
        } for option in product([True, False], repeat=len(values))]
        for combination in combinations:
            (indexer,
             max_memory), interval = utils.timeit(utils.profileit,
                                                  engine.indexit,
                                                  used_tokenizer,
                                                  filenames,
                                                  memory_usage=args.memory,
                                                  **combination)
            indexer.save(args.output)
            print('Answers({}):'.format(', '.join([
                '{} = {}'.format(key, value)
                for key, value in combination.items()
            ])))
            print('Time taken: {}s'.format(interval))
            print('Max memory usage: {}'.format(utils.sizeof_fmt(max_memory)))
            print('Disk size: {}'.format(
                utils.sizeof_fmt(os.path.getsize(args.output))))
            indexer.dispose()
            del indexer
    else:
        if not files_exist:
            print(
                'Error: File or directory (with files) to index doesn\'t exist!'
            )
        if not stopwords_exist:
            print('Error: Stopwords\' file doesn\'t exist!')
Esempio n. 18
0
def main():
    parser.add_argument(
        '--store_positions',
        action='store_true',
        help='Indicates if indexer stores positions of terms or not')
    parser.add_argument('--tfidf',
                        action='store_true',
                        help='Indicates if program calculates tfidf or not')
    args = parser.parse_args()
    filenames = utils.get_filenames(args.input)
    files_exist = len(filenames) != 0
    stopwords_exist = os.path.isfile(args.stopwords)
    if files_exist and stopwords_exist:
        used_tokenizer = tokenizers[args.tokenizer]
        if used_tokenizer.has_rule(rules.stopping):
            used_tokenizer.make_rule(rules.stopping, args.stopwords)
        (index, max_memory), interval = utils.timeit(
            utils.profileit,
            indexit,
            used_tokenizer,
            filenames,
            store_positions=args.store_positions,
            calculate_tfidf=args.tfidf,
            memory_usage=args.memory)
        index.save(args.output)
        print('Answers:')
        print('Time taken: {}s'.format(interval))
        print('Max memory usage: {}'.format(utils.sizeof_fmt(max_memory)))
        print('Disk size: {}'.format(
            utils.sizeof_fmt(os.path.getsize('{}.csv'.format(args.output)))))
        shutil.rmtree('index')
    else:
        if not files_exist:
            print(
                'Error: File or directory (with files) to index doesn\'t exist!'
            )
        if not stopwords_exist:
            print('Error: Stopwords\' file doesn\'t exist!')
Esempio n. 19
0
def train_data_feed(batch_size, data_dict=None):
    """Train data feed.

    :param: batch_size:
    :param: data_dir:
    :param: img_ext_lists: img suffix lists.
    :param: label_ext: label suffix.
    :param: mean_shape:
    :param: print_debug:
    """
    data_dir = data_dict['data_dir']
    img_ext_lists = data_dict['img_ext_lists']
    label_ext = data_dict['label_ext']
    flatten = data_dict['flatten']
    img_name_list, label_name_list = \
        get_filenames([data_dir], img_ext_lists, label_ext)
    data_size = len(img_name_list)
    batch_offset = 0
    indices = [_ for _ in range(data_size)]
    while True:
        start = batch_offset
        batch_offset += batch_size
        if batch_offset > data_size:
            np.random.shuffle(indices)
            start = 0
            batch_offset = batch_size
        end = batch_offset
        chosen_indices = indices[start:end]
        img_list = []
        label_list = []
        for index in chosen_indices:
            img = cv2.imread(img_name_list[index])
            label = load_label(label_name_list[index], flatten)
            img_list.append(img)
            label_list.append(label)

        yield np.array(img_list), np.array(label_list)
Esempio n. 20
0
    def __init__(self, server):
        self.server = server
        master = tk.Tk()

        self.widget_filenames = []
        self.widget_type_filenames = []
        self.selected_widget_index: Optional[int] = None

        master.title("Prototyping")

        tk.Label(master,
                 text="Welcome to our Prototyping System!").grid(row=0,
                                                                 column=0,
                                                                 columnspan=2)

        # top left area
        left = tk.Frame(master)
        left.grid(row=1, column=0, sticky="nsew")

        tk.Label(left, text="Widgets:").pack()

        self.widget_listbox = tk.Listbox(left, exportselection=False)
        self.widget_listbox.pack()
        self.load_widgets_listbox()
        self.widget_listbox.bind("<<ListboxSelect>>", self.on_widget_selected)

        widget_types = [
            filename.replace(".py", "")
            for filename in utils.get_filenames("templates")
        ]
        self.selected_widget_type = tk.Spinbox(left,
                                               values=widget_types,
                                               wrap=True)
        self.selected_widget_type.pack(side=tk.TOP)

        self.selected_widget_photo_id = tk.Spinbox(left, from_=0, to_=11)
        self.selected_widget_photo_id.pack(side=tk.TOP)

        # Start these as hidden
        self.selected_widget_type.pack_forget()
        self.selected_widget_photo_id.pack_forget()

        # bottom left area

        divider = tk.Frame(left, height=1, bg="black")

        new_widget_label = tk.Label(left, text="Add New Widget:")

        self.new_widget_name = tk.Entry(left)
        self.widget_type_listbox = tk.Listbox(left)
        self.load_widget_type_listbox()
        self.new_widget_photo_id_picker = tk.Spinbox(left, from_=0, to_=11)

        add_button = tk.Button(left,
                               text="Add",
                               command=self.add_widget_pressed)

        add_button.pack(side=tk.BOTTOM)
        self.new_widget_photo_id_picker.pack(side=tk.BOTTOM)
        self.widget_type_listbox.pack(side=tk.BOTTOM)
        self.new_widget_name.pack(side=tk.BOTTOM)
        new_widget_label.pack(side=tk.BOTTOM)
        divider.pack(side=tk.BOTTOM, fill="x")

        # right area
        right = tk.Frame(master)
        right.grid(row=1, column=1)

        tk.Label(right, text="Code:").pack()

        self.code_text = ScrolledText(right,
                                      borderwidth=1,
                                      relief="solid",
                                      width=100,
                                      height=40)
        self.code_text.pack()

        tk.Button(right, text="Save", command=self.save_pressed).pack()
        tk.Button(right, text="Delete", command=self.delete_pressed).pack()

        master.grid_rowconfigure(1, weight=1)
        master.grid_columnconfigure(1, weight=1)

        # React to changes from the systems in firebase
        self.server.systems.on_snapshot(self.on_systems_change)

        master.mainloop()
Esempio n. 21
0
def cluster_gen(directory, clusters, data, raw_data, generator_network, \
                     isShown = False, rafd_kid = True, alpha=1,
                     isAdjustWeight = False,
                     isBackward = True,
                     isRandWeight = False,
                     weight_bound = [0.6, 1],
                     k = 100,
                    ):
    '''
    Inputs for this code block:
    - data
    - raw_data for calling 'imagePath'
    - path to latent vectors folder
    - functions:
    + hierarchical_partition
    + generate_images
    + face_recognition.face_locations
    + face_recognition.face_encodings
    + fakeid_quality

    Output for this code block:
    - fakeid_dist_avg
    - all_distances
    '''
    cluster_index_dict = cluster_idx_dict(clusters)
    filenames = get_filenames(directory, isFolder=isBackward)
    data_frame = []
    for i, vectorPath in enumerate(filenames):
        d = [{
            'cluster': clusters[i],
            'encoding': data[i],
            'latentVector': np.load(vectorPath).reshape((1, 18, -1))
        }]
        data_frame.extend(d)
    df_stats = pd.DataFrame(data_frame)
    labelList = np.unique(clusters)
    information_loss = []
    fakeid_dist_avg = []
    all_distances = []

    #for cluster_id in range(448,449):
    for cluster_id in tqdm_notebook(range(0, len(labelList)),
                                    desc='[Generating]: '):
        #for cluster_id in labelList:
        ################ print ids in cluster ##########
        if isShown:
            show_cluster_ids(cluster_id, cluster_index_dict, raw_data)
        #################################################
        cluster_vectors = [
            x['latentVector'] for x in data_frame if x['cluster'] == cluster_id
        ]
        if isRandWeight:
            weights = random_list(weight_bound[0], weight_bound[1],
                                  len(cluster_vectors))
            mix = alpha * mix_function(cluster_vectors, weights)
        else:
            mix = alpha * sum(cluster_vectors) / len(cluster_vectors)

        df_stats.loc[df_stats.cluster == cluster_id, 'mix_latent'] = \
            df_stats.loc[df_stats.cluster == cluster_id].apply(lambda row: mix, axis = 1)
        # print(mix.shape)
        img = generate_images(generator_network, mix, z=False)[0]
        quality_fakeid, distances, df_stats = \
             dist_to_fakeid(img, cluster_id, clusters, data, df_stats)

        information_loss.append(quality_fakeid)
        fakeid_dist_avg.append(quality_fakeid / len(cluster_vectors))
        all_distances.append(distances)
        if isShown:
            plt.imshow(img)
            plt.axis('off')
            plt.title("Generated image for cluster %d" % cluster_id)
            plt.show()
    df_stats['disclosure'] = df_stats['fakeid_dist'].map(lambda row: 1
                                                         if row > 0.6 else 0)
    disclosure_prob = len(
        df_stats[df_stats['disclosure'] == 0]) / len(clusters)
    #Disclosure Risks Assessment
    if isAdjustWeight:
        previous_alpha = alpha
        while disclosure_prob > 1 / k:
            current_alpha = previous_alpha * 0.8
            df_stats = adjust_weights(df_stats,
                                      clusters,
                                      data,
                                      raw_data,
                                      cluster_index_dict,
                                      generator_network,
                                      alpha=current_alpha,
                                      beta=0.8,
                                      isShown_re_gen=isShown)
            disclosure_prob = len(
                df_stats[df_stats['disclosure'] == 0]) / len(clusters)
            previous_alpha = current_alpha
        #calculating below variables according to new df_stats
        information_loss = []
        all_distances = []
        fakeid_dist_avg = []
        for label in labelList:
            cluster_stats = df_stats[df_stats.cluster == label]
            information_loss.append(cluster_stats.information_loss.unique()[0])
            fakeid_dist_avg.append(cluster_stats.avg_IL.unique()[0])
            all_distances.append(cluster_stats.fakeid_dist.values.tolist())
        disclosure_prob = len(
            df_stats[df_stats['disclosure'] == 0]) / len(clusters)

    return fakeid_dist_avg, all_distances, information_loss, \
                labelList, disclosure_prob, df_stats
Esempio n. 22
0
            if self.options['bragg_search']:
                OutputFile.create_dataset("processing/hitfinder/peakinfo",
                                          data=self.peaks.astype(np.int))
            OutputFile.close()

        # Conversion to Pickle
        if cctbx and 'pickles' in self.options['output_formats']:
            pixels = flex.int(self.data.astype(np.int32))
            pixel_size = self.detector.pixel1
            data = dpack(data=pixels,
                         distance=self.options['distance'],
                         pixel_size=pixel_size,
                         wavelength=self.options['wavelength'],
                         beam_center_x=self.options['beam_y'] * pixel_size,
                         beam_center_y=self.options['beam_x'] * pixel_size,
                         ccd_image_saturation=self.detector.overload,
                         saturated_value=self.detector.overload)
            data = crop_image_pickle(data)
            OutputFileName = os.path.join(self.result_folder,
                                          'PICKLES_%s' % self.num.zfill(3),
                                          "%s.pickle" % self.root)
            easy_pickle.dump(OutputFileName, data)


if __name__ == '__main__':
    from test import options_SSX
    from utils import get_filenames
    test = HitFinder(options_SSX, None, None)
    images = get_filenames(options)
    test.data = fabio.open(images[100])
Esempio n. 23
0
    def __init__(self,
                 encoders,
                 decoders,
                 checkpoint_dir,
                 learning_rate,
                 learning_rate_decay_factor,
                 batch_size,
                 keep_best=1,
                 dev_prefix=None,
                 score_function='corpus_scores',
                 name=None,
                 ref_ext=None,
                 pred_edits=False,
                 dual_output=False,
                 binary=None,
                 truncate_lines=True,
                 ensemble=False,
                 checkpoints=None,
                 beam_size=1,
                 len_normalization=1,
                 early_stopping=True,
                 **kwargs):

        self.batch_size = batch_size
        self.character_level = {}
        self.binary = []

        for encoder_or_decoder in encoders + decoders:
            encoder_or_decoder.ext = encoder_or_decoder.ext or encoder_or_decoder.name
            self.character_level[
                encoder_or_decoder.ext] = encoder_or_decoder.character_level
            self.binary.append(encoder_or_decoder.get('binary', False))

        self.char_output = decoders[0].character_level

        self.src_ext = [encoder.ext for encoder in encoders]
        self.trg_ext = [decoder.ext for decoder in decoders]

        self.extensions = self.src_ext + self.trg_ext

        self.ref_ext = ref_ext
        if self.ref_ext is not None:
            self.binary.append(False)

        self.pred_edits = pred_edits
        self.dual_output = dual_output

        self.dev_prefix = dev_prefix
        self.name = name

        self.max_input_len = [encoder.max_len for encoder in encoders]
        self.max_output_len = [decoder.max_len for decoder in decoders]

        if truncate_lines:
            self.max_len = None  # we let seq2seq.get_batch handle long lines (by truncating them)
        else:  # the line reader will drop lines that are too long
            self.max_len = dict(
                zip(self.extensions, self.max_input_len + self.max_output_len))

        self.learning_rate = tf.Variable(learning_rate,
                                         trainable=False,
                                         name='learning_rate',
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)

        with tf.device('/cpu:0'):
            self.global_step = tf.Variable(0,
                                           trainable=False,
                                           name='global_step')
            self.baseline_step = tf.Variable(0,
                                             trainable=False,
                                             name='baseline_step')

        self.filenames = utils.get_filenames(extensions=self.extensions,
                                             dev_prefix=dev_prefix,
                                             name=name,
                                             ref_ext=ref_ext,
                                             binary=self.binary,
                                             **kwargs)
        utils.debug('reading vocabularies')
        self.vocabs = None
        self.src_vocab, self.trg_vocab = None, None
        self.read_vocab()

        for encoder_or_decoder, vocab in zip(encoders + decoders, self.vocabs):
            if vocab:
                encoder_or_decoder.vocab_size = len(vocab.reverse)

        utils.debug('creating model')

        self.models = []
        if ensemble and checkpoints is not None:
            for i, _ in enumerate(checkpoints, 1):
                with tf.variable_scope('model_{}'.format(i)):
                    model = Seq2SeqModel(encoders,
                                         decoders,
                                         self.learning_rate,
                                         self.global_step,
                                         name=name,
                                         pred_edits=pred_edits,
                                         dual_output=dual_output,
                                         baseline_step=self.baseline_step,
                                         **kwargs)
                    self.models.append(model)
            self.seq2seq_model = self.models[0]
        else:
            self.seq2seq_model = Seq2SeqModel(encoders,
                                              decoders,
                                              self.learning_rate,
                                              self.global_step,
                                              name=name,
                                              pred_edits=pred_edits,
                                              dual_output=dual_output,
                                              baseline_step=self.baseline_step,
                                              **kwargs)
            self.models.append(self.seq2seq_model)

        self.seq2seq_model.create_beam_op(self.models, beam_size,
                                          len_normalization, early_stopping)

        self.batch_iterator = None
        self.dev_batches = None
        self.train_size = None
        self.saver = None
        self.keep_best = keep_best
        self.checkpoint_dir = checkpoint_dir
        self.epoch = None

        self.training = utils.AttrDict()  # used to keep track of training

        try:
            self.reversed_scores = getattr(
                evaluation, score_function).reversed  # the lower the better
        except AttributeError:
            self.reversed_scores = False  # the higher the better

if __name__ == "__main__":
    # ================= get the arguments ====================
    args = _get_args()
    save_csv_logger = os.path.join(args.save_to_dir, args.save_csv_logger)
    weight_save = os.path.join(args.save_to_dir, args.weight_save_prefix)

    image_train_dir = os.path.join(args.data_path, args.image_train_dir)
    mask_train_dir = os.path.join(args.data_path, args.mask_train_dir)
    image_eval_dir = os.path.join(args.data_path, args.image_eval_dir)
    mask_eval_dir = os.path.join(args.data_path, args.mask_eval_dir)

    # # ========================================================
    # # create image train
    filenames_train = get_filenames(image_train_dir)
    filenames_train.sort(key=natural_key)
    image_train = []
    for file in filenames_train:
        image_train.append(ndimage.imread(image_train_dir + file))
    image_train = np.array(image_train)
    print(image_train[0].shape)

    # # ========================================================
    # # create mask train
    filenames_train = get_filenames(mask_train_dir)
    filenames_train.sort(key=natural_key)
    mask_train = []
    for file in filenames_train:
        mask_train.append(ndimage.imread(mask_train_dir + file))
Esempio n. 25
0
def pretty_print(msg):
    sys.stdout.write('\r{msg}\033[K'.format(msg=msg))


input_dir = input('Input directory (will be read recursively): ')
output_dir = input(
    'Output directory (will be created if not present): [./tracks]'
) or './tracks'
length = input('Track length, in seconds: [70] ') or 70
length = int(length)

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

filenames = utils.get_filenames(input_dir)
filenames_length = len(filenames)
for i, filename in enumerate(filenames.values(), start=1):
    pretty_print(
        'Converting file {i}/{total}... [{filename}]'.format(
            i=i,
            total=filenames_length,
            filename=os.path.basename(filename),
        ),
    )
    track = audio.load(filename)
    track = audio.cut(track, length * 1000)
    new_filename = os.path.join(
        output_dir,
        os.path.basename(filename),
    )
Esempio n. 26
0
def main(
    root_path: str,
    bucket_name: str,
    aws_region_name: str = "us-east-2",
    recursive: bool = False,
    replace_if_exists: bool = False,
    aws_profile_name: str = None,
    key_prefix: str = None,
    extensions: tuple = None,
):
    """
    Main program entrypoint - runs the S3 video upload program.

    Parameters
    ----------
    root_path: str
        Root path where the files you want to upload to S3 are.

    bucket_name: str
        Name of the S3 bucket to upload to.

    aws_region_name: str (Optional, default is us-east-2 (Ohio))
        Name of the AWS region name to create the client object in.

    recursive: bool (Optional, default is True)
        Whether or not you want the program to recursively find files in subdirectories.

    replace_if_exists: bool (Optional, default is False)
        Whether to replace existing objects in S3 with a newer version.

    aws_profile_name: str (Optional)
        The name of the AWS profile to use - this will look in the ~/.aws/credentials file on your machine and
        use the credentials provided under the "aws_profile_name" entry.

        If running on your personal machine, you must specify this parameter.

    key_prefix: str (Optional)
        The key prefix of the files you wish to upload. If you do not specify this,
        the files will be uploaded using the absolute path from your computer.

        E.g., if this is not passed in, your files will be located as C:/Users/path/to/files/
        in the S3 bucket.

    extensions: tuple (Optional)
        Valid file extensions to be uploaded.

    Returns
    -------
    None
    """

    try:
        files = []

        boilerplate_warning()

        client = init_aws_client(service_name="s3",
                                 profile_name=aws_profile_name,
                                 region_name=aws_region_name)

        root_path_is_directory = check_path_is_directory(root_path=root_path)

        if root_path_is_directory:
            files = list(
                get_filenames(root_path=root_path,
                              recursive=recursive,
                              extensions=extensions))
        elif not root_path_is_directory:
            if recursive:
                warnings.warn(
                    message=
                    "Warning! Recursive flag does not change application state when uploading a single file!",
                    category=RuntimeWarning,
                )
            files = [root_path]

        upload_files(
            client=client,
            files=files,
            root_path=root_path,
            replace_if_exists=replace_if_exists,
            bucket_name=bucket_name,
            key_prefix=key_prefix,
            root_path_is_directory=root_path_is_directory,
        )

    except Exception as e:
        raise e
Esempio n. 27
0
    octconv.add_oct_conv_block(filters=64, kernel_size=(1, 1))
    octconv.add_final_oct_conv_layer(filters=128, kernel_size=(5, 5))
    octconv.construct_model(
        name=name,
        metrics=metrics
    )
    return octconv.get_model()


# Properties
data_path = "DATASET/BSDS500"
batch_size = 10

srcnn_model = create_srcnn()
# Dataset filepath
data_raw_lr_filenames = get_filenames(data_path + '_BICUBIC')
data_raw_hr_filenames = get_filenames(data_path + '_CROPPED')
batch_generator_train = SRCNNGenerator(data_raw_lr_filenames, data_raw_hr_filenames, batch_size=batch_size)

# calculates bicubic psnr before training prints after every epoch
bicubic_callback = BicubicPSNR(train_generator=batch_generator_train)

# Checkpoint part
checkpoint_path = "training/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Initialize saver
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
Esempio n. 28
0
                        type=str,
                        help='Filename (without extension) of the index model')
    parser.add_argument('tokenizer',
                        choices=list(tokenizers.keys()),
                        type=str,
                        help='Indicates which tokenizer the ranker must use')
    parser.add_argument(
        '-s',
        '--stopwords',
        type=str,
        help=
        'Filename of the stopwords list (ignored if tokenizer is "simple_tokenizer")',
        default='stopwords.txt')
    args = parser.parse_args()

    filenames = utils.get_filenames(args.documents)
    files_exist = len(filenames) != 0
    stopwords_exist = os.path.isfile(args.stopwords)
    has_file = os.path.isfile(args.file) if args.file else True
    model_exist = os.path.isfile('{}.csv'.format(
        args.model)) and os.path.isfile('{}.idx'.format(args.model))
    if files_exist and model_exist and stopwords_exist and has_file:
        used_tokenizer = tokenizers[args.tokenizer]
        if used_tokenizer.has_rule(rules.stopping):
            used_tokenizer.make_rule(rules.stopping, args.stopwords)
        index = Index.segment_on_load(args.model, tokenizer=used_tokenizer)
        ranker = Ranker(index)

        queries = []
        if args.query:
            queries.append(args.query)
Esempio n. 29
0
#
#     return percent_security_name_valid, stats_performance, True


def eval_security_stats(generated, y):
    out = []
    for stat, key in zip(generated, y):
        if stat is None:
            out += [None]
        try:
            stat = float(stat)
            key = float(key)
            if (stat - key) == 0:
                out += [1]
                continue
        except ValueError:
            print("value error", stat, key)
            if stat == key:
                out += [1]
                continue
        out += [0]
    print("out", out)
    return out


if __name__ == "__main__":
    filenames = get_filenames(5)
    # filenames = ["/Users/DanielLongo/Dropbox/VC RA Avinika Narayan/Contracts project/coi/Done OCR'd/Kabbage/15866_Kabbage_COI_06302015.pdf"]
    # filenames = ["/Users/DanielLongo/Dropbox/VC RA Avinika Narayan/Contracts project/coi/Done OCR'd/Veralight/veralight_inc072806.pdf"]
    eval_files(filenames)
Esempio n. 30
0
    # Step 3. Get important 'colNames'
    col_names = reader.read_column_names(
        filepath=
        'Z:/1. 프로젝트/2018_삼성SDS_스타크래프트/Supervised/Importance_column_revised.xlsx',
        threshold=3)

    # Step 4. Instantiate a 'DataParser'
    parser = SimpleDataParser(logger=base_logger,
                              units_to_keep=unit_names,
                              columns_to_keep=col_names,
                              output_size=output_size)

    # Step 5. Get names of csv files from which to import replay data
    filedir = 'D:/parsingData/data(선수별)/{}/'.format(player)
    filedir = 'Y:/data(선수별)/{}/'.format(player)
    filelist = get_filenames(filedir, logger=base_logger)

    filenames = []
    for f in filelist:
        if f.split('.')[-1] != 'csv':
            continue
        try:
            versus = reader.get_replay_info(f).get('versus')
            if filter_filenames(versus=versus, against=against):
                filenames.append(f)
        except IndexError as e:
            base_logger.warning(
                "Inappropriate filename, skipping '{}'".format(f))

    base_logger.info(
        '({}/{}) replays will be parsed from this directory.'.format(
Esempio n. 31
0
                        choices=["mse", "mae"])
    parser.add_argument("--weights", type=str, help="initial weight path")
    parser.add_argument("--out",
                        type=str,
                        help="save weight path. Default ./log/{model}.h5")
    args = parser.parse_args()

    # create model
    print("create model")
    model = get_model(args.model, args.mag)

    if args.weights:
        model.load_weights(args.weights)

    # load dataset
    filenames = get_filenames(args.data)
    filenames = select_img_by_size(filenames, args.imsize * args.mag)

    # data generator
    if args.model in ["srcnn", "vdsr", "drcn"]:
        pre_up_scale = True
    else:
        pre_up_scale = False

    # image range
    r = (-1, 1)  #default (0, 1)
    gen = data_generator(filenames,
                         args.batch,
                         preprocess_xy,
                         size=args.imsize,
                         mag=args.mag,
Esempio n. 32
0
# Black background
import matplotlib as mpl
mpl.rcParams['axes.facecolor']='black'

######## Create arrays from the data #####

# If there is no input parameters, the script will read all the files
# in the same directory.
# The parameters could be only "one" or "two" numbers, that will mean
# plot "one file" or plot "a range of files".
from parser import parse_args
from utils  import get_particle_colors_id, get_particle_colors, get_filenames

options = parse_args()

files = get_filenames(options)
first = True
for file in files:
    data = loadtxt(file)

    ID   = data[:, 0]  # Column 1
    mass = data[:, 1]  # Column 2
    x    = data[:, 2]  # Column 3
    y    = data[:, 3]  # Column 4
    z    = data[:, 4]  # Column 5

    # Sun
    xs = data[0,2]
    ys = data[0,3]
    zs = data[0,4]