Esempio n. 1
0
    def normalize_features(self, extraname=''):
        # Setting up folders and filenames
        self._feat_dir = self.get_unnormalized_feat_dir(extraname)
        self._feat_dir_norm = self.get_normalized_feat_dir(extraname)
        utils.create_folder(self._feat_dir_norm)
        normalized_features_wts_file = self.get_normalized_wts_file()

        # pre-processing starts
        print('Estimating weights for normalizing feature files:')
        print('\t\tfeat_dir {}'.format(self._feat_dir))

        spec_scaler = joblib.load(normalized_features_wts_file)
        print('Normalizing feature files:')
        # spec_scaler = joblib.load(normalized_features_wts_file) #load weights again using this command
        for file_cnt, file_name in enumerate(os.listdir(self._feat_dir)):
                print(file_cnt, file_name)
                feat_file = np.load(os.path.join(self._feat_dir, file_name))
                feat_file = spec_scaler.transform(np.concatenate((np.abs(feat_file), np.angle(feat_file)), axis=1))
                np.save(
                    os.path.join(self._feat_dir_norm, file_name),
                    feat_file
                )
                del feat_file
        print('normalized files written to {} folder and the scaler to {}'.format(
            self._feat_dir_norm, normalized_features_wts_file))
Esempio n. 2
0
def tool(is_testing, column, mode, option):
    global TRAIN, TEST

    if is_testing:
        TRAIN = TESTING_TRAIN_FILE
        TEST = TESTING_TEST_FILE

    if mode == "purge":
        purge_duplicated_records(column)
    elif mode == "restructure":
        for filetype in ["train", "test"]:
            hierarchical_folder_structure(column, filetype)
    elif mode == "repair":
        repair_missing_records(column)
    elif mode == "aggregation":
        columns = [COLUMNS[c] for c in column.split(",")]
        output_filepath = os.path.join(STATS_PATH, "{}.csv".format("_".join(columns)))
        create_folder(output_filepath)

        aggregation(columns, output_filepath)
    elif mode == "cc":
        column, column_value = option
        filepath = os.path.join(SPLIT_PATH, COLUMNS[column], "train", "{}.csv".format(column_value))

        cc(filepath)
    else:
        log("Not found this mode {}".format(mode), ERROR)
        sys.exit(101)
Esempio n. 3
0
def compute_scaler(data_type):
    """Compute and write out scaler of data. 
    """
    workspace = config.workspace

    if data_type == 'train':
        snr = config.Tr_SNR

    # Load data.
    t1 = time.time()
    hdf5_path = os.path.join(workspace, "packed_features", "spectrogram",
                             data_type, "%ddb" % int(snr), "data.h5")
    with h5py.File(hdf5_path, 'r') as hf:
        x = hf.get('x')
        x = np.array(x)  # (n_segs, n_concat, n_freq)

    # Compute scaler.
    (n_segs, n_concat, n_freq) = x.shape
    x2d = x.reshape((n_segs * n_concat, n_freq))
    scaler = StandardScaler(with_mean=True, with_std=True).fit(x2d)
    #    print(scaler.mean_)
    #    print(scaler.scale_)

    # Write out scaler.
    out_path = os.path.join(workspace, "packed_features", "spectrogram",
                            data_type, "%ddb" % int(snr), "scaler.p")
    create_folder(os.path.dirname(out_path))
    pickle.dump(scaler, open(out_path, 'wb'))

    print("Save scaler to %s" % out_path)
    print("Compute scaler finished! %s s" % (time.time() - t1, ))
Esempio n. 4
0
    def __init__(self, info_dir=os.path.join(os.path.dirname(__file__),
                                             "request_info"),
                 request_num=100, request_kind='nym',
                 seed='000000000000000000000000Trustee1', thread_num=1,
                 log=False):
        super().__init__(log, seed)

        self.info_dir = info_dir
        self.req_num = request_num
        self.req_kind = request_kind
        if thread_num <= 0:
            self.thread_num = 1
        elif request_num < thread_num:
            self.thread_num = request_num
        else:
            self.thread_num = thread_num

        self.info_file_path = "{}_{}_{}.txt".format(
            self.req_kind + "_requests_info", str(threading.get_ident()),
            time.strftime("%d-%m-%Y_%H-%M-%S"))

        self.info_file_path = os.path.join(self.info_dir,
                                           self.info_file_path)
        self.req_info = list()
        self.threads = list()

        utils.create_folder(self.info_dir)
Esempio n. 5
0
def preprocess(mode, columns, n_jobs):
    global WORKSPACE, TRAIN_FILE, TEST_FILE

    if mode:
        if mode.lower() == "producer":
            producer(columns)
        else:
            consumer(n_jobs=n_jobs)
    else:
        queue = Queue.Queue()
        for filepath in [TRAIN_FILE, TEST_FILE]:
            df = pd.read_csv(filepath)

            for column in columns.split(","):
                column = COLUMNS[column]

                output_folder = os.path.join(WORKSPACE, "split", column, os.path.basename(filepath).replace(".csv", ""))
                create_folder(os.path.join(output_folder, "1.txt"))

                for n in range(0, n_jobs):
                    thread = SplitThread(kwargs={"df": df, "queue": queue})
                    thread.setDaemon(True)
                    thread.start()

                for unique_value in df[column].unique():
                    output_filepath = os.path.join(output_folder, "{}.csv".format(unique_value))

                    if os.path.exists(output_filepath):
                        log("Found {} so skipping it".format(output_filepath), INFO)
                    else:
                        queue.put((output_filepath, None, column, unique_value, None))
                        log("Put {} into the queue".format(output_filepath), INFO)

                queue.join()
def reduce_predicates(folder_name_pred, output_file_name, file_name_filters=""):
    """
    Goes through all files in folder, reads content and checks, if all predicates in file are true.
    Writes reduced result to output file.
    """

    # Creates output file
    create_folder(const.FOLDER_SORT_REDUCTION)
    file_name_reduction = "%s%s" % (const.FOLDER_SORT_REDUCTION, output_file_name)
    file_reduction = open(file_name_reduction, "w+")

    for file_name in os.listdir(folder_name_pred):
        if not verify_file_name(file_name, file_name_filters, const.FILE_EXTENSION):
            continue

        # Reduces all predicates to only one predicate "True" of "False"
        with open(folder_name_pred + file_name) as file_sort:
            file_content = file_sort.read()
            predicates_true = all(int(predicate) for predicate in file_content.split())

            sort_name = file_name.replace("_", " ")
            sort_output = "%s%s%s" % (sort_name, const.SEPARATOR, predicates_true)
            print(sort_output, file=file_reduction)

    file_reduction.close()
Esempio n. 7
0
def main(argv):
    # arguments parsing
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--config',
                        help="config file (default: config/development.conf",
                        default="./config/development.conf")
    args = parser.parse_args()
    config = utils.read_config(args.config)

    years = range(config['scraping']['years_range'][0],
                  config['scraping']['years_range'][1] + 1)
    n_proc = config['scraping']['n_proc']

    # create the folders in which the poster will be downloaded
    for year in years:
        utils.create_folder('{}/{}/posters'.format(PATH_IMGS, year))
        utils.create_folder('{}/{}/thumbnails'.format(PATH_IMGS, year))

    # Downloading the posters with multiprocessing (highly speed up compare to single process)
    print('Retrieve url of posters')
    with Pool(n_proc) as p:
        yearly_urls = p.map(get_yearly_url_imgs, years)
    yearly_urls = list(itertools.chain.from_iterable(yearly_urls))

    # push to db
    session = db_manager.get_db(config['general']['db_uri'])
    objects = [db_manager.Poster(x) for x in yearly_urls]
    session.bulk_save_objects(objects)
    session.commit()
Esempio n. 8
0
def generate_images_for_single_image_masks(dicom_images, inference_results,
                                           output_folder):
    """ This function will save images to disk to preview how a mask looks on the input images.
        It saves one image for each input DICOM file with the corresponding `inference_results` mask
        applied as overlay.
        
        - dicom_images: Array of DCM_Image or path to a folder with images
        - inference_results: Array with mask buffers (one for each image)
        - output_folder: Where the output images will be saved 

        The difference with `generate_images_with_masks` is that `generate_images_with_masks` applies each mask to the whole
        volume while this functions applies each mask to one image.
    """
    images, masks = _get_images_and_masks(dicom_images, inference_results)
    create_folder(output_folder)

    mask_alpha = 0.5
    for index, (image, mask) in enumerate(zip(images, masks)):
        dcm = pydicom.dcmread(image.path)
        pixels = _get_pixels(dcm)
        max_value = np.iinfo(pixels.dtype).max

        image_mask = mask
        pixels = np.reshape(pixels, (-1, 3))

        pixels[image_mask > 128] = pixels[image_mask > 128] * (1 - mask_alpha) + \
            (mask_alpha * np.array(get_colors(0, max_value)).astype(np.float)).astype(np.uint8)

        output_filename = os.path.join(
            output_folder,
            str(index) + os.path.basename(os.path.normpath(image.path)))
        output_filename += '.png'

        pixels = np.reshape(pixels, (dcm.Rows, dcm.Columns, 3))
        plt.imsave(output_filename, pixels)
Esempio n. 9
0
def save_cache(obj, filepath, is_json=False, is_hdb=False):
    if is_hdb:
        filepath += ".hdb"

        hdb = db.DB()
        hdb.open(filepath, None, db.DB_HASH, db.DB_CREATE)

        for test_id, info in obj.items():
            hdb.put(str(test_id), pickle.dumps(info))

        hdb.sync()
        hdb.close()

        log("Save cache in BerkeleyDB format({})".format(filepath), INFO)
    elif is_json:
        filepath += ".json.bz2"

        with BZ2File(filepath, "wb") as OUTPUT:
            json.dump(obj, OUTPUT)

        log("Save cache in JSON format({})".format(filepath), INFO)
    else:
        create_folder(filepath)
        with open(filepath, "wb") as OUTPUT:
            pickle.dump(obj, OUTPUT)

        log("Save {}'s cache in {}".format(obj.__class__, filepath), INFO)
Esempio n. 10
0
def _split_originals(AUDIO_FILE_PATH):
    output_folder = "/audio_files/dataset/original_splits"
    if os.path.exists(output_folder):
        return output_folder
    audio_files = list()
    for root, dirnames, filenames in os.walk(AUDIO_FILE_PATH):
        for f in filenames:
            ext = f.split('.')[-1]
            if ext in ['mp3', 'wav', 'm4a']:
                file_path = os.path.join(root, f)
                audio_files.append(file_path)
    np.random.shuffle(audio_files)
    train_cnt = int(np.ceil(len(audio_files) * 0.8))

    for src_path in audio_files:
        folder_name = ''
        if train_cnt > 0:
            train_cnt -= 1
            folder_name = 'train'
        else:
            folder_name = 'test'

        dirname = os.path.basename(os.path.dirname(src_path))
        outname = os.path.basename(src_path)
        dst_path = os.path.join(output_folder, folder_name, dirname)
        utils.create_folder(dst_path)
        dst_path = os.path.join(dst_path, outname)
        shutil.copyfile(src_path, dst_path)
        print(src_path, dst_path)
    return output_folder
    def analyze_model(self, epoch):
        filename_model = os.path.join(self.dir_models, 'epoch_{}.pth'.format(epoch))
        g = Generator(self.nb_channels_first_layer, self.dim)
        g.cuda()
        g.load_state_dict(torch.load(filename_model))
        g.eval()

        nb_samples = 50
        batch_z = np.zeros((nb_samples, 32 * self.nb_channels_first_layer, 4, 4))
        # batch_z = np.maximum(5*np.random.randn(nb_samples, 32 * self.nb_channels_first_layer, 4, 4), 0)
        # batch_z = 5 * np.random.randn(nb_samples, 32 * self.nb_channels_first_layer, 4, 4)

        for i in range(4):
            for j in range(4):
                batch_z[:, :, i, j] = create_path(nb_samples)
        # batch_z[:, :, 0, 0] = create_path(nb_samples)
        # batch_z[:, :, 0, 1] = create_path(nb_samples)
        # batch_z[:, :, 1, 0] = create_path(nb_samples)
        # batch_z[:, :, 1, 1] = create_path(nb_samples)
        batch_z = np.maximum(batch_z, 0)

        z = Variable(torch.from_numpy(batch_z)).type(torch.FloatTensor).cuda()
        temp = g.main._modules['4'].forward(z)
        for i in range(5, 10):
            temp = g.main._modules['{}'.format(i)].forward(temp)

        g_z = temp.data.cpu().numpy().transpose((0, 2, 3, 1))

        folder_to_save = os.path.join(self.dir_experiment, 'epoch_{}_path_after_linear_only00_path'.format(epoch))
        create_folder(folder_to_save)

        for idx in range(nb_samples):
            filename_image = os.path.join(folder_to_save, '{}.png'.format(idx))
            Image.fromarray(np.uint8((g_z[idx] + 1) * 127.5)).save(filename_image)
    def __init__(self, parameters):
        dir_datasets = os.path.expanduser('~/datasets')
        dir_experiments = os.path.expanduser('~/experiments')

        dataset = parameters['dataset']
        train_attribute = parameters['train_attribute']
        test_attribute = parameters['test_attribute']
        embedding_attribute = parameters['embedding_attribute']

        self.dim = parameters['dim']
        self.nb_channels_first_layer = parameters['nb_channels_first_layer']

        name_experiment = parameters['name_experiment']

        self.dir_x_train = os.path.join(dir_datasets, dataset, '{0}'.format(train_attribute))
        self.dir_x_test = os.path.join(dir_datasets, dataset, '{0}'.format(test_attribute))
        self.dir_z_train = os.path.join(dir_datasets, dataset, '{0}_{1}'.format(train_attribute, embedding_attribute))
        self.dir_z_test = os.path.join(dir_datasets, dataset, '{0}_{1}'.format(test_attribute, embedding_attribute))

        self.dir_experiment = os.path.join(dir_experiments, 'gsn_hf', name_experiment)
        self.dir_models = os.path.join(self.dir_experiment, 'models')
        self.dir_logs = os.path.join(self.dir_experiment, 'logs')
        create_folder(self.dir_models)
        create_folder(self.dir_logs)

        self.batch_size = 128
        self.nb_epochs_to_save = 1
Esempio n. 13
0
    def analyze_model(self, epoch):
        filename_model = os.path.join(self.dir_models, 'epoch_{}.pth'.format(epoch))
        g = Generator(self.nb_channels_first_layer, self.dim)
        g.cuda()
        g.load_state_dict(torch.load(filename_model))
        g.eval()

        nb_samples = 50
        batch_z = np.zeros((nb_samples, 32 * self.nb_channels_first_layer, 4, 4))
        # batch_z = np.maximum(5*np.random.randn(nb_samples, 32 * self.nb_channels_first_layer, 4, 4), 0)
        # batch_z = 5 * np.random.randn(nb_samples, 32 * self.nb_channels_first_layer, 4, 4)

        for i in range(4):
            for j in range(4):
                batch_z[:, :, i, j] = create_path(nb_samples)
        # batch_z[:, :, 0, 0] = create_path(nb_samples)
        # batch_z[:, :, 0, 1] = create_path(nb_samples)
        # batch_z[:, :, 1, 0] = create_path(nb_samples)
        # batch_z[:, :, 1, 1] = create_path(nb_samples)
        batch_z = np.maximum(batch_z, 0)

        z = Variable(torch.from_numpy(batch_z)).type(torch.FloatTensor).cuda()
        temp = g.main._modules['4'].forward(z)
        for i in range(5, 10):
            temp = g.main._modules['{}'.format(i)].forward(temp)

        g_z = temp.data.cpu().numpy().transpose((0, 2, 3, 1))

        folder_to_save = os.path.join(self.dir_experiment, 'epoch_{}_path_after_linear_only00_path'.format(epoch))
        create_folder(folder_to_save)

        for idx in range(nb_samples):
            filename_image = os.path.join(folder_to_save, '{}.png'.format(idx))
            Image.fromarray(np.uint8((g_z[idx] + 1) * 127.5)).save(filename_image)
        def _generate_path(dir_z, dir_x, train_test):
            dataset = EmbeddingsImagesDataset(dir_z, dir_x)
            fixed_dataloader = DataLoader(dataset, 2, shuffle=True)
            fixed_batch = next(iter(fixed_dataloader))

            z0 = fixed_batch['z'][[0]].numpy()
            z1 = fixed_batch['z'][[1]].numpy()

            batch_z = np.copy(z0)

            nb_samples = 100

            interval = np.linspace(0, 1, nb_samples)
            for t in interval:
                if t > 0:
                    zt = normalize((1 - t) * z0 + t * z1)
                    batch_z = np.vstack((batch_z, zt))

            z = Variable(torch.from_numpy(batch_z)).type(torch.FloatTensor).cuda()
            g_z = g.forward(z)

            # filename_images = os.path.join(self.dir_experiment, 'path_epoch_{}_{}.png'.format(epoch, train_test))
            # temp = make_grid(g_z.data, nrow=nb_samples).cpu().numpy().transpose((1, 2, 0))
            # Image.fromarray(np.uint8((temp + 1) * 127.5)).save(filename_images)

            g_z = g_z.data.cpu().numpy().transpose((0, 2, 3, 1))

            folder_to_save = os.path.join(self.dir_experiment, 'epoch_{}_{}_path'.format(epoch, train_test))
            create_folder(folder_to_save)

            for idx in range(nb_samples):
                filename_image = os.path.join(folder_to_save, '{}.png'.format(idx))
                Image.fromarray(np.uint8((g_z[idx] + 1) * 127.5)).save(filename_image)
Esempio n. 15
0
 def init_log_file(path: str):
     """
     Initiate log file.
     """
     RequestsSender.close_log_file()
     utils.create_folder(os.path.dirname(path))
     RequestsSender.__log_file = open(path, 'w')
Esempio n. 16
0
def resample_folder(inpath,
                    outpath,
                    timeColHeader,
                    gapTolerance=np.inf,
                    samplingRate=None):
    '''

    :param inpath:
    :param outpath:
    :param timeColHeader:
    :param gapTolerance:
    :param samplingRate:
    :return:
    '''
    create_folder(outpath)
    files = list_files_in_directory(inpath)

    for file in files:
        if not file.startswith('.'):
            dataDf = pd.read_csv(os.path.join(inpath, file))

            if len(dataDf):
                if 'date' in dataDf.columns:
                    dataDf = dataDf.drop(columns=['date'])
                # print(dataDf.dtypes)
                # dataDf = dataDf.astype({"Time": float})
                newDf = resample(dataDf,
                                 timeColHeader,
                                 samplingRate,
                                 gapTolerance=np.inf,
                                 fixedTimeColumn=None)
                newDf.to_csv(os.path.join(outpath, file), index=None)
def reduce_predicates(folder_name_pred,
                      output_file_name,
                      file_name_filters=""):
    """
    Goes through all files in folder, reads content and checks, if all predicates in file are true.
    Writes reduced result to output file.
    """

    # Creates output file
    create_folder(const.FOLDER_SORT_REDUCTION)
    file_name_reduction = "%s%s" % (const.FOLDER_SORT_REDUCTION,
                                    output_file_name)
    file_reduction = open(file_name_reduction, "w+")

    for file_name in os.listdir(folder_name_pred):
        if not verify_file_name(file_name, file_name_filters,
                                const.FILE_EXTENSION):
            continue

        # Reduces all predicates to only one predicate "True" of "False"
        with open(folder_name_pred + file_name) as file_sort:
            file_content = file_sort.read()
            predicates_true = all(
                int(predicate) for predicate in file_content.split())

            sort_name = file_name.replace("_", " ")
            sort_output = "%s%s%s" % (sort_name, const.SEPARATOR,
                                      predicates_true)
            print(sort_output, file=file_reduction)

    file_reduction.close()
Esempio n. 18
0
def combine_bands():
    '''Combines all specified bands per file'''

    print("-> Start combining bands...")

    # Create Out folder
    TEMP_FOLDERS["combined_bands"] = create_folder(OUT_FOLDER,
                                                   "03_combined_bands")

    # Iterate over each day
    for day in listdir(TEMP_FOLDERS["unzipped"]):

        # Create one folder per day
        folder_day = create_folder(TEMP_FOLDERS["combined_bands"], day)

        # Iterate over each granule
        for granule in listdir("{0}/{1}".format(TEMP_FOLDERS["extracted"],
                                                day)):

            # Get list of input files
            band_path_list = get_paths_for_files_in_folder(
                "{0}/{1}/{2}/".format(TEMP_FOLDERS["extracted"], day, granule))
            band_path_list.sort()  # make sure bands are always in same order

            # Build out_path (filename without file extension and band number)
            out_filename = "{0}.vrt".format(
                band_path_list[0].split("/")[-1].split(".")[0][:-4])
            out_path = "{0}/{1}".format(folder_day, out_filename)

            # Combine all dataset bands in one vrt-file
            gdal.BuildVRT(out_path, band_path_list, separate=True, srcNodata=0)
            print(" - Combined bands for {0}".format(out_filename))

    print("-> Finished combining bands.")
Esempio n. 19
0
    def __init__(self, parameters):
        dir_datasets = os.path.expanduser('~/datasets')
        dir_experiments = os.path.expanduser('~/experiments')

        dataset = parameters['dataset']
        train_attribute = parameters['train_attribute']
        test_attribute = parameters['test_attribute']
        embedding_attribute = parameters['embedding_attribute']

        self.dim = parameters['dim']
        self.nb_channels_first_layer = parameters['nb_channels_first_layer']

        name_experiment = parameters['name_experiment']

        self.dir_x_train = os.path.join(dir_datasets, dataset,
                                        '{0}'.format(train_attribute))
        self.dir_x_test = os.path.join(dir_datasets, dataset,
                                       '{0}'.format(test_attribute))
        self.dir_z_train = os.path.join(
            dir_datasets, dataset, '{0}_{1}'.format(train_attribute,
                                                    embedding_attribute))
        self.dir_z_test = os.path.join(
            dir_datasets, dataset, '{0}_{1}'.format(test_attribute,
                                                    embedding_attribute))

        self.dir_experiment = os.path.join(dir_experiments, 'gsn_hf',
                                           name_experiment)
        self.dir_models = os.path.join(self.dir_experiment, 'models')
        self.dir_logs = os.path.join(self.dir_experiment, 'logs')
        create_folder(self.dir_models)
        create_folder(self.dir_logs)

        self.batch_size = 128
        self.nb_epochs_to_save = 1
Esempio n. 20
0
def view_samples(args):
    # visualize some sample images
    all_image_path = glob(os.path.join(args.path, '*', '*.jpg'))
    imageid_path_dict = {
        os.path.splitext(os.path.basename(x))[0]: x
        for x in all_image_path
    }

    save_path = "./images/sample"
    create_folder(save_path)

    fig = plt.figure(figsize=(5, 5))
    columns, rows = 3, 2
    start, end = 0, len(imageid_path_dict)
    ax = []
    import random
    for i in range(columns * rows):
        k = random.randint(start, end)
        img = mpimg.imread(all_image_path[k])
        # create subplot and append to ax
        ax.append(fig.add_subplot(rows, columns, i + 1))
        plt.xticks([])
        plt.yticks([])
        plt.imshow(img)
    plt.tight_layout()
    plt.title('Sample input images', fontdict={'size': 10})
    plt.savefig(save_path + 'input_image.png')

    # Checking the size and number of channels in the image
    arr = np.asarray(Image.open(all_image_path[10]))
    print(f"The shape of each image is {arr.shape}")
Esempio n. 21
0
 def extract_content(self):
     classes = self.extract_classes()
     for klass in classes[1:]:  # Exclude ONLINE CLASS
         folder_name = remove_accents(klass['class'])
         create_folder(folder_name)
         print('Extracting Class: {0}'.format(klass['class']))
         self.browser.get('https://unipac-bomdespacho.blackboard.com{0}'.format(klass['href']))
         self.browser.find_element_by_id('header::0-whatsNewView::CO').click()  # Open content list
         block_class_contents = self.browser.find_element_by_id('block::0-whatsNewView::CO')
         class_contents = block_class_contents.find_elements_by_css_selector(
             "a[onclick*='nautilus_utils.actionSelected']"
         )
         i_content = 0
         for i_content in range(i_content, len(class_contents)):
             try:
                 block_classes_contents = self.browser.find_element_by_id('block::0-whatsNewView::CO')
                 class_contents = block_classes_contents.find_elements_by_css_selector(
                     "a[onclick*='nautilus_utils.actionSelected']"
                 )
                 class_contents[i_content].click()
                 self.check_visibility(By.CLASS_NAME, "individualContent-link")
                 file_link = self.browser.find_element_by_class_name('individualContent-link').get_attribute('href')
                 cookies = self.browser.get_cookies()
                 download(cookies, file_link, folder_name)
                 self.browser.back()
                 self.check_visibility(By.ID, "block::0-whatsNewView::CO")
             except TimeoutException:
                 print("Error in: {0} - {1}".format(klass['class'], klass['href']))
Esempio n. 22
0
def main():
    img_dirs = [
        './img/org', './img/bg_0', './img/bg_127', './img/bg_255',
        './img/obj_0', './img/obj_127', './img/obj_255'
    ]

    arch = "resnet152"
    target_layer = "layer4"
    cuda = True
    topk = 1
    for img_dir in img_dirs:

        output_dir = "./cam_{}_{}_{}".format(arch, target_layer,
                                             img_dir.replace("/", '_'))
        create_folder(output_dir)
        image_paths = []

        for i in range(0, 50000):
            filename = "ILSVRC2012_val_000{:05}.JPEG".format(i + 1)
            image_path = os.path.join(img_dir, filename)
            image_paths.append(image_path)

        for images in list(chunks(image_paths, n=16)):
            # process_a_batch(images, target_layer, arch, topk, output_dir, cuda)
            arguments = [
                "python3", "run.py", "-a", "{}".format(arch), "-t",
                "{}".format(target_layer), "-o", "{}".format(output_dir), "-i"
            ]
            for img in images:
                # arguments.append("-i")
                arguments.append("{}".format(img))
            print(arguments)
            subprocess.call(arguments)
Esempio n. 23
0
def generate_images_with_boxes(images, boxes, output_folder):
    # Generate images for boxes. `boxes` should be an array of dict
    # Format: {'label': '?', 'SOPInstanceUID': dcm.SOPInstanceUID, 'top_left': [5, 5], 'bottom_right': [10, 10]}
    create_folder(output_folder)

    for index, image in enumerate(images):
        dcm = pydicom.dcmread(image.path)
        pixels = get_pixels(dcm)
        pixels = np.reshape(pixels, (dcm.Rows, dcm.Columns, 3))

        pil_image = Image.fromarray(pixels)
        draw = ImageDraw.Draw(pil_image)
        image_boxes = [
            box for box in boxes if image.instanceUID == box['SOPInstanceUID']
        ]

        for box in image_boxes:
            # apply box
            ul = box['top_left']
            br = box['bottom_right']
            points = [
                tuple(ul), (br[0], ul[1]),
                tuple(br), (ul[0], br[1]),
                tuple(ul)
            ]
            draw.line(points, fill="red", width=5)

            boxes.remove(box)

        # write image to output folder
        output_filename = os.path.join(
            output_folder,
            str(index) + '_' + os.path.basename(os.path.normpath(image.path)))
        output_filename += '.png'
        pil_image.save(output_filename)
Esempio n. 24
0
def visualise_epoch(data_container, model, args, cuda, base_path):
    base_path_ae = os.path.join(base_path, 'ae_vis')
    base_path_dualatt = os.path.join(base_path, 'dualatt_vis')
    create_folder(base_path_ae)
    create_folder(base_path_dualatt)

    model.eval()
    i = 0
    for x, _, audio_names in tqdm(data_container['val_dataloader']):
        if cuda:
            x = x.cuda()
        out_dict = model(x)
        y_pred = out_dict['y_pred'].cpu().detach().numpy()
        x_rec = out_dict['x_rec'].cpu().detach().numpy()
        class_x = out_dict['class_wise_input'].cpu().detach().numpy()
        mel_attw = out_dict['mel_attw'].cpu().detach().numpy()
        time_attw = out_dict['time_attw'].cpu().detach().numpy()
        mel_x = out_dict['mel_x'].cpu().detach().numpy()
        time_x = out_dict['time_x'].cpu().detach().numpy()
        x = x.cpu().detach().numpy()
        # here i maintains sample count (global)
        # here j maintains count inside batch (local)
        for j in range(x.shape[0]):
            reconstruction_plot(x[j], x_rec[j], args, audio_names[j],
                                base_path_ae)
            attention_plot(mel_x[j], mel_attw[j], time_x[j], time_attw[j],
                           args, audio_names[j], base_path_dualatt)
            i = i + 1
Esempio n. 25
0
def _place_info_index(df, range_x, range_y, size_x, size_y, output_folder):
    for window_size_x, window_size_y in zip(size_x, size_y):
        folder = output_folder + "_windown_size={},{}".format(window_size_x, window_size_y)

        for x in range_x:
            start_x, end_x = x, min(11, x+window_size_x)
            c2 = (df["x"].values >= start_x) & (df["x"].values < end_x)

            for y in range_y:
                start_y, end_y = y, min(11, y+window_size_y)
                c3 = (df["y"].values >= start_y) & (df["y"].values < end_y)

                if df[c2 & c3].shape[0] > 0:
                    filepath_output = os.path.join(folder, "{}_{}.csv".format(start_x, start_y))
                    if not os.path.exists(filepath_output):
                        create_folder(filepath_output)

                        with open(filepath_output, "wb") as OUTPUT:
                            place_ids, counts = np.unique(df[c2 & c3]["place_id"].values, return_counts=True)
                            for place_id, count in zip(place_ids, counts):
                                OUTPUT.write("{},{}\n".format(place_id, count))

                        log("Save file in {}".format(filepath_output), INFO)
                    else:
                        log("Skip {}".format(filepath_output), INFO)
Esempio n. 26
0
def reproject():
    '''Reprojects all UTM zones into specified reference system'''

    print("-> Start reprojection...")

    # Create Out folder with subfolders for each day
    TEMP_FOLDERS["reproject"] = create_folder(OUT_FOLDER, "05_reproject")

    for day in listdir(TEMP_FOLDERS["unzipped"]):

        # Create one folder per day
        folder_day = create_folder(TEMP_FOLDERS["reproject"], day)

        for utm_file in listdir("{0}/{1}".format(TEMP_FOLDERS["utm"], day)):

            # Get input path
            in_path = "{0}/{1}/{2}".format(TEMP_FOLDERS["utm"], day, utm_file)

            # Build output path
            out_filename = "{0}_epsg{1}.vrt".format(
                utm_file.split(".")[0], OUT_EPSG)
            out_path = "{0}/{1}".format(folder_day, out_filename)

            # Reproject each utm file to set epsg
            gdal.Warp(out_path,
                      in_path,
                      dstSRS="EPSG:{0}".format(OUT_EPSG),
                      format="vrt")
            print(" - Reprojected {0}".format(out_filename))

    print("-> Finished reprojection.")
Esempio n. 27
0
def prepare_mozilla_common_data(AUDIO_FILE_PATH):
    OUTPUT_FOLDER = "/audio_files/common_voice_corpus_1"
    exists = os.path.exists(OUTPUT_FOLDER)
    if not exists or (exists and len(os.listdir(OUTPUT_FOLDER)) <= 20000):
        _fix_duration_and_convert_audio(AUDIO_FILE_PATH,
                                        OUTPUT_FOLDER,
                                        file_size_thresh=17)

    audio_clips = os.listdir(OUTPUT_FOLDER)  # total clips
    OUTPUT_FOLDER_2 = "/audio_files/dataset/classes/non-target"

    exists = os.path.exists(OUTPUT_FOLDER_2)
    if not exists or (exists and len(os.listdir(OUTPUT_FOLDER_2)) != 10000):
        utils.create_folder(OUTPUT_FOLDER_2)

        #Choose 10000 audio clips randomly
        # np.random.shuffle(audio_clips)
        # audio_clips = audio_clips[:10000]

        #save audio clips
        for aud_clip in audio_clips:
            src_file = os.path.join(OUTPUT_FOLDER, aud_clip)
            dst_file = os.path.join(OUTPUT_FOLDER_2, aud_clip)
            shutil.copyfile(src_file, dst_file)
            print('copying', src_file, dst_file)
    else:
        print("Noting to do")
Esempio n. 28
0
def image_downloader(img_links, folder_name):
    img_names = []

    try:
        parent = os.getcwd()
        try:
            folder = os.path.join(os.getcwd(), folder_name)
            utils.create_folder(folder)
            os.chdir(folder)
        except Exception:
            print("Error in changing directory.")

        for link in img_links:
            img_name = "None"

            if link != "None":
                img_name = (link.split(".jpg")[0]).split("/")[-1] + ".jpg"

                # this is the image id when there's no profile pic
                if img_name == selectors.get("default_image"):
                    img_name = "None"
                else:
                    try:
                        urllib.request.urlretrieve(link, img_name)
                    except Exception:
                        img_name = "None"

            img_names.append(img_name)

        os.chdir(parent)
    except Exception:
        print("Exception (image_downloader):", sys.exc_info()[0])

    return img_names
def write_result(initial, word, results):
    create_folder(BASE_FOLDER, MODEL_NAME, TIME_STR, initial)
    processed_results = sorted(results, key=lambda k: (k[0], -k[1]))
    write_data(
        f"./{BASE_FOLDER}/{MODEL_NAME}/{TIME_STR}/{initial}/{word}.txt",
        processed_results,
    )
Esempio n. 30
0
            def _generate_path(dir_z, dir_x, train_test):
                dataset = EmbeddingsImagesDataset(dir_z, dir_x)
                fixed_dataloader = DataLoader(dataset, 2, shuffle=True)
                fixed_batch = next(iter(fixed_dataloader))

                z0 = fixed_batch['z'][[0]].numpy()
                z1 = fixed_batch['z'][[1]].numpy()

                batch_z = np.copy(z0)

                nb_samples = 100

                interval = np.linspace(0, 1, nb_samples)
                for t in interval:
                    if t > 0:
                        # zt = normalize((1 - t) * z0 + t * z1)
                        zt = (1 - t) * z0 + t * z1
                        batch_z = np.vstack((batch_z, zt))

                z = torch.from_numpy(batch_z).float().cuda()
                g_z = g.forward(z)

                # filename_images = os.path.join(self.dir_experiment, 'path_epoch_{}_{}.png'.format(epoch, train_test))
                # temp = make_grid(g_z.data, nrow=nb_samples).cpu().numpy().transpose((1, 2, 0))
                # Image.fromarray(np.uint8((temp + 1) * 127.5)).save(filename_images)

                g_z = g_z.data.cpu().numpy().transpose((0, 2, 3, 1))

                folder_to_save = dir_to_save / 'epoch_{}_{}_path'.format(epoch_to_load, train_test)
                create_folder(folder_to_save)

                for idx in range(nb_samples):
                    filename_image = os.path.join(folder_to_save, '{}.png'.format(idx))
                    Image.fromarray(np.uint8((g_z[idx] + 1) * 127.5)).save(filename_image)
Esempio n. 31
0
    def train(self, X_train, X_test, y_train, y_test):
        # compile model
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

        num_outputs = self.model.outputs[0].shape[-1]

        if num_outputs == 1:
            loss = 'binary_crossentropy'
            metrics = [
                tf.keras.metrics.BinaryAccuracy(),
                tf.keras.metrics.Precision(),
                tf.keras.metrics.Recall()
            ]
        else:
            loss = 'categorical_crossentropy',
            y_train = tf.one_hot(y_train, num_outputs)
            y_test = tf.one_hot(y_test, num_outputs)

            metrics = [
                tf.keras.metrics.CategoricalAccuracy(),
                tf.keras.metrics.Precision(),
                tf.keras.metrics.Recall()
            ]

        self.model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

        # self.model.summary()
        callbacks = list()

        log_dir = "output/logs"
        create_folder(log_dir)
        callbacks.append(tf.keras.callbacks.TensorBoard(log_dir=log_dir))

        checkpoint_filepath = 'output/checkpoints/chk-{epoch:02d}-{val_loss:.8f}.ckpt'
        checkpoint_dir = os.path.dirname(checkpoint_filepath)
        create_folder(checkpoint_dir)
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_filepath,
            save_weights_only=True,
            monitor='val_loss',
            mode='min',
            save_best_only=True,
            verbose=1)

        callbacks.append(model_checkpoint_callback)

        # train model
        try:
            self.model.fit(X_train,
                           y_train,
                           validation_data=(X_test, y_test),
                           batch_size=168,
                           epochs=100,
                           callbacks=callbacks)
        except KeyboardInterrupt:
            pass

        self.export()  # load model with best weights and export
        self.model.evaluate(X_test, y_test, verbose=2)
Esempio n. 32
0
def _time_split_data(df, new_column, value, output_folder):
    idx = (df[new_column] == value)

    filepath_output = os.path.join(output_folder, "{}.csv".format(value))
    create_folder(filepath_output)

    df[idx].to_csv(filepath_output, index=False)
    log("Save file in {}".format(filepath_output), INFO)
Esempio n. 33
0
 def save_file(self, message_shown=True):
     create_folder()
     f = open("logs/data_third.txt", "w")
     f.write(str(self.result_value))
     if message_shown:
         show_message("Інформацію збережено до файлу logs/data_third.txt",
                      QMessageBox.Information)
     f.close()
Esempio n. 34
0
    def load(self, dir_name):
        rmtree(self._log_dir)
        create_folder(self._log_dir)

        onlyfiles = [f for f in listdir(dir_name) if isfile(join(dir_name, f))]

        for file in onlyfiles:
            copyfile(join(dir_name, file), join(self._log_dir, file))
Esempio n. 35
0
 def _seperate_one(self, _file):
     time = self._get_time(_file)
     folder = self._get_time_folder(time)
     fd_path = path_join([self.mv_path, folder, ''])
     if not path_exists(fd_path):
         create_folder(fd_path)
     print _file, fd_path
     print copy(_file, fd_path)
Esempio n. 36
0
 def save_file(self, message_shown=True):
     create_folder()
     f = open("logs/data_fourth.txt", "w")
     f.write(str(self.logic.get_z()))
     if message_shown:
         show_message("Інформацію збережено до файлу logs/data_fourth.txt",
                      QMessageBox.Information)
     f.close()
Esempio n. 37
0
    def submit(self, model, filepath, mode="training", n_top=10):
        create_folder(filepath)
        (training_dataset, testing_dataset), results, predicted_proba = self.get_dataset(), None, None

        if mode == "training":
            if self.method == "classifier":
                if self.objective.find("binary") > -1:
                    predicted_proba = model.predict_proba(training_dataset)[:,1]
                else:
                    predicted_proba = model.predict_proba(training_dataset)
            elif self.method == "regressor":
                predicted_proba = model.predict(training_dataset)
            else:
                raise NotImplementError

            estimator = model
            if hasattr(model, "best_estimator_"):
                estimator = model.best_estimator_

            pool = [dict(zip(model.best_estimator_.classes_, probas)) for probas in predicted_proba]
            for idx, pair in enumerate(pool):
                class_names = []

                for class_name, class_proba in sorted(pair.items(), key=(lambda (k, v): v), reverse=True)[:n_top]:
                    class_names.append(class_name)

                pool[idx] = " ".join(class_names)

            results = {"Target": self.train_y, "Predicted_Proba": pool}
        else:
            if self.method == "classifier":
                if self.objective.find("binary") > -1:
                    predicted_proba = model.predict_proba(testing_dataset)[:,1]
                else:
                    predicted_proba = model.predict_proba(testing_dataset)
            elif self.method == "regressor":
                predicted_proba = model.predict(testing_dataset)
            else:
                raise NotImplementError

            estimator = model
            if hasattr(model, "best_estimator_"):
                estimator = model.best_estimator_

            pool = [dict(zip(estimator.classes_, probas)) for probas in predicted_proba]
            for idx, pair in enumerate(pool):
                class_names = []

                for class_name, class_proba in sorted(pair.items(), key=(lambda (k, v): v), reverse=True)[:n_top]:
                    class_names.append(class_name)

                pool[idx] = " ".join(class_names)

            results = {"ID": self.test_id, "Target": pool}

        if not os.path.exists(filepath):
            log("Compile a submission results for kaggle in {}".format(filepath), INFO)
            save_kaggle_submission(results, filepath)
Esempio n. 38
0
def split_by_hour(file, NECKLACE_DIR):
    create_folder(NECKLACE_DIR)

    # This is a date days before the study, to remove the 1969 error data.
    starttimestamp = 1000000000000 #September 8, 2001 8:46:40 PM GMT-05:00 DST
    localtz = settings['TIMEZONE']

    print(file)

    df = pd.read_csv(file)

    print('len', len(df), '\n')
    df = df[~df['Time'].isin(['Time'])]
    print('Remove redundant headers...\n')
    print('len', len(df), '\n')
    l1 = len(df)

    df = df.dropna()
    df['Time'] = pd.to_numeric(df['Time'], errors='ignore')
    df = df[df['Time'] > starttimestamp]
    print('len', len(df), '\n')
    l2 = len(df)

    print('# Timestamp 1969 Error Lines: ', str(l1 - l2))

    df = df.sort_values('Time')

    df['date'] = pd.to_datetime(df['Time'],unit='ms')
    df = df.set_index(['date'])
    df.index = df.index.tz_localize('UTC').tz_convert(settings['TIMEZONE'])

    # dt: absolute hour of the first timestamp
    dt = datetime(year = df.index[0].year, month = df.index[0].month, \
                    day = df.index[0].day, hour = df.index[0].hour, minute = 0, second = 0)
    dt = localtz.localize(dt)
    print(df.index[0])
    print(df.index[-1])

    #========================================================================================================
    # split each hour into separate file under day folder
    #========================================================================================================
    startHour = dt
    endHour = dt + timedelta(hours = 1)

    while endHour < df.index[-1] + timedelta(hours = 1):
        dfHr = df[(df.index >= startHour) & (df.index < endHour)]
        
        if len(dfHr):
            file = datetime_to_filename(startHour)
            dfHr.to_csv(os.path.join(NECKLACE_DIR, file))

            print(startHour)
            print(endHour)
            print(len(dfHr))
            print(file)

        startHour += timedelta(hours = 1)
        endHour += timedelta(hours = 1)
Esempio n. 39
0
    def run_the_node(self, status_file, status_dict):
        """ Run the step represented by the node and updates the status.json file which gives a live output of the running process.
        It uses the status_file (location of the status file) and the status_dict (python dictionary representing the status.json file)
        to give a live report of the node being processed.
        """

        utils.create_folder(self.output_folder)

        status_dict[self.name] = {}
        status_dict[self.name]["status"] = "in progress"
        status_dict[self.name]["progress"] = 0
        utils.update_json_file(status_file, status_dict)

        cmd_line = []
        cmd_line.append(self.binary_name)
        for option, value in self.add_locations_to_command_line():
            cmd_line.append(option)
            cmd_line.append(value)
        for option, value in self.add_parameters_to_command_line():
            cmd_line.append(option)
            cmd_line.append(value)

        log = open(self.log_dir, 'w')
        # Dealing with DepthMap particular case
        if (self.name == "depth_map"):
            # Dividing the task if needed
            group_size = self.parameters["groupSize"]
            number_of_groups = (self.nb_of_images +
                                (group_size - 1)) // group_size
            for group_iter in range(number_of_groups):
                range_start = group_size * group_iter
                range_size = min(group_size, self.nb_of_images - range_start)
                print("DepthMap Group {}/{} : {}, {}".format(
                    group_iter + 1, number_of_groups, range_start, range_size))
                cmd = cmd_line + [
                    '--rangeStart',
                    str(range_start), '--rangeSize',
                    str(range_size)
                ]
                print(cmd)
                subprocess.run(cmd, stderr=log)
                status_dict[self.name]["progress"] = (
                    (group_iter + 1) / number_of_groups) * 100
                print(status_dict)
                utils.update_json_file(status_file, status_dict)
        else:
            print(cmd_line)
            subprocess.run(cmd_line, stderr=log)
            status_dict[self.name]["progress"] = 100
            utils.update_json_file(status_file, status_dict)

        log.close()

        status_dict[self.name]["status"] = "done"
        status_dict[self.name]["progress"] = 100
        utils.update_json_file(status_file, status_dict)

        return 0
Esempio n. 40
0
def get_agent_file(agent_name):
    directory_name = os.environ.get("MODELS_PATH") + "export/" 
    file_name = agent_name + ".json"
    file_path = directory_name + file_name
    remove_file_or_dir(file_path)
    dic = AgentsService.get_instance().create_agent_file(agent_name)
    create_folder(directory_name)
    with open(file_path, "w+") as f:
        json.dump(dic,f)
    return(send_from_directory(directory = directory_name, filename = "./" + file_name, as_attachment = True))
Esempio n. 41
0
def main():
    # Create file with $1_$2.log
    # Create file with $1_$2_long.log
    # Start bot
    global log, log_long, log_directory
    create_folder(log_directory)
    log = create_file(log_directory + username + "_" + stream + ".log")
    log_long = create_file(log_directory + username + "_" + stream +
                           "_long.log")
    bot()
Esempio n. 42
0
File: screen.py Progetto: zdzhjx/see
    def screenshot(self, event):
        self.assert_context_state()

        folder_path = self.configuration['results_folder']
        screenshot_path = os.path.join(folder_path,
                                       "%s_%s.ppm" % (self.identifier, event))
        create_folder(folder_path)

        with open(screenshot_path, 'wb') as screenshot_file:
            screenshot_stream = screenshot(self.context)
            screenshot_file.write(screenshot_stream)
Esempio n. 43
0
def run():
    config = utils.load_cfg('conf.cfg')
    template_path = config.get('image', 'filename')

    markers = {'school': config.getint('markers', 'school'),
               'year': config.getint('markers', 'year'),
               'level': config.getint('markers', 'level'),
               'award': config.getint('markers', 'award'),
               'recipient': config.getint('markers', 'recipient')}

    font = {'color': config.get('font', 'color'),
            'name': config.get('font', 'name'),
            'size': config.getint('font', 'size')}

    images_per_pdf = config.getint('pdf', 'images_per_pdf')
    if images_per_pdf > 6:
        # 6 is the maximum allowed number of images per pdf
        exit()

    csv_file = config.get('csv', 'filename')
    cc = CsvReader(csv_file)
    csv_data = cc.read()

    school = 'John Scottus School'
    year = '2016'

    image_folder = utils.create_folder('images')
    pdf_folder = utils.create_folder('pdfs')

    count = 0
    images = []

    draw_tool = ImageWriter(font)

    for row in csv_data:
        im = draw_tool.open_image(template_path)
        im = draw_tool.write_text(im, markers['school'], school)
        im = draw_tool.write_text(im, markers['year'], year)
        im = draw_tool.write_text(im, markers['level'], row['Level'])
        im = draw_tool.write_text(im, markers['award'], row['Award'])
        im = draw_tool.write_text(im, markers['recipient'], row['Recipient'])
        im_path = image_folder + '/' + 'image_' + utils.timestamp() + '.png'
        draw_tool.save_image(im, im_path)
        count += 1
        images.append(im_path)

        if count % images_per_pdf == 0:
            create_pdf(pdf_folder, images)
            count = 0
            images = []

    create_pdf(pdf_folder, images)
Esempio n. 44
0
File: memory.py Progetto: zdzhjx/see
    def memory_snapshot(self, event):
        folder_path = self.configuration['results_folder']
        file_name = "%s_%s.bin%s" % (
            event,
            datetime.now().replace(microsecond=0).time().strftime("%H%M%S"),
            self.configuration.get('compress_snapshots', False)
            and '.gz' or '')
        snapshot_path = os.path.join(folder_path, file_name)

        create_folder(folder_path)
        self.dump_memory(snapshot_path)

        return snapshot_path
Esempio n. 45
0
    def start_trace_handler(self, event):
        folder_path = self.configuration['results_folder']

        self.logger.debug("Event %s: starting network tracing.", event)

        create_folder(folder_path)
        self.pcap_path = os.path.join(folder_path, "%s.pcap" % self.identifier)
        self.tracer_process = launch_process(
            TSHARK, '-w', self.pcap_path,
            '-i', self.context.network.bridgeName())
        self.context.trigger("network_tracing_started", path=self.pcap_path)

        self.logger.info("Network tracing started.")
Esempio n. 46
0
def snapshot_to_checkpoint(volume, snapshot, folder_path):
    """Turns a QEMU internal snapshot into a QCOW file."""
    create_folder(folder_path)

    name = snapshot.getName()
    path = os.path.join(folder_path, '%s.qcow2' % name)

    process = launch_process(QEMU_IMG, "convert", "-f", "qcow2", "-o",
                             "backing_file=%s" % volume_backing_path(volume),
                             "-O", "qcow2", "-s", name,
                             volume_path(volume), path)
    collect_process_output(process)

    return path
Esempio n. 47
0
def consumer(ip=IP_BEANSTALK, port=PORT_BEANSTALK, task=COMPETITION_GROUP_NAME, n_jobs=1):
    global WORKSPACE, TRAIN_FILE, TEST_FILE
    df_train = pd.read_csv(TRAIN_FILE)
    df_test = pd.read_csv(TEST_FILE)

    talk = beanstalkc.Connection(host=ip, port=port)
    talk.watch(task)

    hostname = socket.gethostname()

    queue = Queue.Queue()
    for n in range(0, n_jobs):
       thread = SplitThread(kwargs={"df_train": df_train, "df_test": df_test, "queue": queue})
       thread.setDaemon(True)
       thread.start()

    while True:
        job = talk.reserve(timeout=TIMEOUT_BEANSTALK)
        if job:
            o = json.loads(job.body)
            filetype, output_filepaths, column, values = o["filetype"], o["output_filepath"], o["column"], o["value"]

            output_folder = None
            for output_filepath, value in zip(output_filepaths, values):
                output_folder = os.path.dirname(output_filepath)

                create_folder(output_filepath)
                queue.put((output_filepath, filetype, column, value))

            queue.join()

            if hostname != ip:
                p = subprocess.Popen(["scp", "{}/*.csv".format(output_folder), "RungChiChen@{}:{}".format(IP_BEANSTALK, output_folder)])
                pid, sts = os.waitpid(p.pid, 0)
                log("Transfer {} successfully({})".format(output_filepath, sts), INFO)

                if sts == 0:
                    for f in os.listdir(output_folder):
                        if f.endswith(".csv"):
                            filepath = os.path.join(output_folder, f)

                            os.remove(filepath)
                            log("Remove {}".format(filepath), INFO)

            job.delete()

    queue.join()
    talk.close()
Esempio n. 48
0
def _pos_split_data(df, x, range_y, window_size_x, window_size_y, output_folder):
    start_x, end_x = x, min(x+window_size_x, 11)
    c2 = (df["x"].values >= start_x) & (df["x"].values < end_x)

    for y in range_y:
         start_y, end_y = y, min(y+window_size_y, 11)
         c3 = (df["y"].values >= start_y) & (df["y"].values < end_y)

         filepath_output = os.path.join(output_folder, "windown_size={},{}".format(window_size_x, window_size_y), "{}_{}.csv".format(start_x, start_y))
         if not os.path.exists(filepath_output):
            create_folder(filepath_output)

            final_df = df[c2 & c3]
            if final_df.shape[0] > 0:
                final_df.to_csv(filepath_output, index=False)
                log("Save file in {}".format(filepath_output), INFO)
Esempio n. 49
0
def download_file(url, name, dest=".", number=1):
    print "  {0}) In: {1}".format(number, url)
    filepath = os.path.join(create_folder(dest), name)
    try:
        urllib.urlretrieve(url, filepath)
    except:
        print "  !!!! FAIL:", url
    print "  Out: {}\n".format(filepath)
def reduce_sort_timings(folder_name, array_lens, file_name_filters=[], reduce_func=sort_rate):
    """Reduces sort timings and outputs them to files."""

    for distribution in os.listdir(folder_name):
        folder_dist_input = "%s%s/" % (folder_name, distribution)
        folder_dist_output = "%s%s/" % (const.FOLDER_SORT_REDUCTION, distribution)
        create_folder(folder_dist_output)

        # Creates output file
        file_name_output = "%s%s%s" % (
            folder_dist_output, '_'.join(file_name_filters), const.FILE_EXTENSION
        )
        file_output = open(file_name_output, "w+")

        # Saves header to output file
        header = "%s%s" % (const.SEPARATOR, lengths_to_log(array_lens))
        print(header, file=file_output)

        for file_name_sort in os.listdir(folder_dist_input):
            if not verify_file_name(file_name_sort, file_name_filters, const.FILE_EXTENSION):
                continue

            # Reduces sort timings
            with open("%s%s" % (folder_dist_input, file_name_sort), "r") as file_sort:
                content = file_sort.read()
                lines = content.split(const.FILE_NEW_LINE_CHAR)[:-1]
                timings = [[float(t) for t in l.split(const.SEPARATOR)] for l in lines]
                timings_reduced = [reduce_func(t, l) for t, l in zip(timings, array_lens)]

            # Generates sort name
            sort_name = str(file_name_sort)
            for file_filter in file_name_filters:
                sort_name = sort_name.replace(file_filter, "")

            # Outputs sort timings
            sort_name = sort_name[:-len(const.FILE_EXTENSION)]
            sort_name = " ".join(s for s in sort_name.split("_") if s)
            timings_output = const.SEPARATOR.join(str(t).replace(".", ",") for t in timings_reduced)
            output = "%s%s%s" % (sort_name, const.SEPARATOR, timings_output)

            print(output, file=file_output)

        file_output.close()
Esempio n. 51
0
def median_solution(week, output_filepath, filepath, solution):
    log("Store the solution in {}".format(output_filepath), INFO)
    create_folder(output_filepath)

    ts = time.time()
    with open(output_filepath, "wb") as OUTPUT:
        log("Read {}".format(filepath), INFO)
        header = True

        if week < 10:
            OUTPUT.write("Semana,Agencia_ID,Canal_ID,Ruta_SAK,Cliente_ID,Producto_ID,MEDIAN_Demanda_uni_equil\n")

            with open(filepath) as INPUT:
                for line in INPUT:
                    if header:
                        header = False
                    else:
                        w, agency_id, channel_id, route_id, client_id, product_id, _, _, _, _, _ = line.strip().split(",")

                        w = int(w)
                        if w == week:
                            prediction_median = get_median(solution[0], solution[1], {COLUMN_AGENCY: agency_id, COLUMN_PRODUCT: product_id, COLUMN_CLIENT: client_id})

                            OUTPUT.write("{}\n".format(",".join([str(w), agency_id, channel_id, route_id, client_id, product_id, str(prediction_median)])))
                        else:
                            pass
        else:
            OUTPUT.write("id,Demanda_uni_equil\n")

            with open(filepath, "rb") as INPUT:
                for line in INPUT:
                    if header:
                        header = False
                    else:
                        row_id, w, agency_id, channel_id, route_id, client_id, product_id = line.strip().split(",")
                        prediction_median = get_median(solution[0], solution[1], {COLUMN_AGENCY: agency_id, COLUMN_PRODUCT: product_id, COLUMN_CLIENT: client_id})

                        OUTPUT.write("{},{}\n".format(row_id, prediction_median))

    te = time.time()
    log("Cost {:4f} secends to generate the solution".format(te-ts), INFO)
Esempio n. 52
0
def _complex_split_data(df, time_column, time_id, range_x, range_y, size_x, size_y, output_folder):
    c1 = (df[time_column] == time_id)

    folder = os.path.join(output_folder, "{}={}".format(time_column, time_id))
    for window_size_x, window_size_y in zip(size_x, size_y):
        for x in range_x:
            start_x, end_x = x, min(11, x+window_size_x)
            c2 = (df["x"].values >= start_x) & (df["x"].values < end_x)

            for y in range_y:
                start_y, end_y = y, min(11, y+window_size_y)
                c3 = (df["y"].values >= start_y) & (df["y"].values < end_y)

                filepath_output = os.path.join(folder, "windown_size={},{}".format(window_size_x, window_size_y), "{}_{}.csv".format(start_x, start_y))
                if not os.path.exists(filepath_output):
                    create_folder(filepath_output)

                    df[c1 & c2 & c3].to_csv(filepath_output, index=False)
                    log("Save file in {}".format(filepath_output), INFO)
                else:
                    log("Skip {}".format(filepath_output), INFO)
Esempio n. 53
0
def _place_info(df_all, place_id, output_folder):
    filepath_output = os.path.join(output_folder, "{}.csv".format(place_id))

    if os.path.exists(filepath_output):
        log("Skip {}".format(filepath_output), INFO)
    else:
        df = df_all[df_all["place_id"] == place_id]

        results = {"place_id": [place_id]}
        results["left_top"] = ["{},{}".format(df["x"].min(), df["y"].max())]
        results["right_bottom"] = ["{},{}".format(df["x"].max(), df["y"].min())]
        results["std_x"] = [df["x"].std()]
        results["std_y"] = [df["y"].std()]
        results["count"] = [df.shape[0]]
        results["centroid"] = ["{},{}".format(df["x"].median(), df["y"].median())]

        filepath_output = os.path.join(output_folder, "{}.csv".format(place_id))
        create_folder(filepath_output)

        pd.DataFrame.from_dict(results, orient="index").T.to_csv(filepath_output, index=False)
        log("Save file in {}".format(filepath_output), INFO)
Esempio n. 54
0
def consumer(ip=IP_BEANSTALK, port=PORT_BEANSTALK, task=COMPETITION_GROUP_NAME, n_jobs=1):
    df_train = None
    df_train = pd.read_csv(TRAIN_FILE)
    log("Load {} completely".format(TRAIN_FILE))

    df_test = pd.read_csv(TEST_FILE)
    log("Load {} completely".format(TEST_FILE))

    week = 10
    median_route_solution = (load_median_solution(week-1, "route_id", ROUTE_GROUPS), ROUTE_GROUPS)
    median_agency_solution = (load_median_solution(week-1, "agency_id", AGENCY_GROUPS), AGENCY_GROUPS)

    talk = beanstalkc.Connection(host=ip, port=port)
    talk.watch(task)

    for n in range(0, n_jobs):
       thread = SplitThread(kwargs={"df_train": df_train, "df_test": df_test, "median_route_solution": median_route_solution, "median_agency_solution": median_agency_solution,"queue": queue})
       thread.setDaemon(True)
       thread.start()

    while True:
        job = talk.reserve(timeout=TIMEOUT_BEANSTALK)
        if job:
            o = json.loads(job.body)
            filetype, output_filepaths, column, values = o["filetype"], o["output_filepath"], o["column"], o["value"]

            output_folder = None
            for output_filepath, value in zip(output_filepaths, values):
                output_folder = os.path.dirname(output_filepath)

                create_folder(output_filepath)
                queue.put((output_filepath, filetype, column, value))

            queue.join()

            job.delete()

    queue.join()
    talk.close()
Esempio n. 55
0
def hierarchical_folder_structure(column, filetype):
    prefixs = set()
    folder = os.path.join(SPLIT_PATH, COLUMNS[column], filetype.lower())

    if not os.path.isdir(folder):
        log("{} is not a folder".format(folder), INFO)
        return

    timestamp_start = time.time()
    for filepath in glob.iglob("{}/*.csv".format(folder)):
        filename = os.path.basename(filepath)

        prefix = filename[0:3]
        prefixs.add(prefix)

        new_folder = os.path.join(folder, prefix)
        new_filepath = os.path.join(new_folder, filename)

        create_folder(new_filepath)
        os.rename(filepath, new_filepath)
        log("Move {} to {}".format(filepath, new_filepath), INFO)

    timestamp_end = time.time()
    log("Cost {:4f} secends to move files to the sub-folders".format(timestamp_end-timestamp_start), INFO)

    hostname = socket.gethostname()
    if hostname != IP_BEANSTALK:
        timestamp_start = time.time()
        for prefix in prefixs:
            filepath = os.path.join(folder, prefix)

            p = subprocess.Popen(["scp", "-r", filepath, "RungChiChen@{}:\"{}\"".format(IP_BEANSTALK, folder.replace(" ", "\\\\ "))])
            pid, sts = os.waitpid(p.pid, 0)
            log("Transfer {} successfully({})".format(filepath, sts), INFO)

    timestamp_end = time.time()
    log("Cost {:4f} secends to copy files to the {}".format(timestamp_end-timestamp_start, IP_BEANSTALK), INFO)
Esempio n. 56
0
def cache_median(filepath, filetype, week=9, output_folder=MEDIAN_SOLUTION_PATH):
    df = pd.read_csv(filepath)

    shape = df.shape
    df = df[df[COLUMN_WEEK] <= week]
    new_shape = df.shape
    log("After filtering, the shape is modified from {} to {}".format(shape, new_shape), INFO)

    drop_columns = [COLUMN_WEEK, 'Venta_uni_hoy', 'Venta_hoy', 'Dev_uni_proxima', 'Dev_proxima']
    df.drop(drop_columns, inplace=True, axis=1)

    target = {COLUMN_PREDICTION: np.median}

    groups = None
    if filetype == MONGODB_COLUMNS[COLUMN_ROUTE]:
        groups = ROUTE_GROUPS
    elif filetype == MONGODB_COLUMNS[COLUMN_AGENCY]:
        groups = AGENCY_GROUPS

    for group in groups:
        median = df.groupby(group).agg(target).to_dict()

        solution = {}
        for key, value in median[COLUMN_PREDICTION].items():
            if isinstance(key, np.int64):
                solution[str(key)] = value
            else:
                solution["_".join([str(s) for s in key])] = value

        log("There are {} records in median_solution".format(len(solution)), INFO)
        output_filepath = os.path.join(output_folder, filetype, "week={}".format(week), "{}.json".format("_".join([str(s) for s in group])))
        create_folder(output_filepath)
        with open(output_filepath, "wb") as OUTPUT:
            json.dump(solution, OUTPUT)

            log("Write median solution to {}".format(output_filepath), INFO)
Esempio n. 57
0
    def __init__(self):
        self.options = Options().args

        self.tester = None

        temp = 0
        for mode in PerformanceTestRunner.modes:
            if mode in sys.argv:
                temp += 1

        if temp == 0:
            utils.print_error(
                'Cannot determine any kind of request for testing')
            utils.print_error(
                'May be you missing arguments "-a" or "-b" or "-t" or "-l"')
            sys.exit(1)

        if temp > 1:
            utils.force_print_error_to_console(
                '"-a" and "-g" and "-t" and "-l" '
                'cannot exist at the same time\n')
            sys.exit(1)

        self.list_tester = list()

        self.start_time = self.finish_time = 0
        self.lowest = self.fastest = 0
        self.passed_req = self.failed_req = 0
        self.result_path = os.path.join(os.path.dirname(__file__), 'results')
        utils.create_folder(self.result_path)
        log_path = os.path.join(os.path.dirname(__file__), 'logs')
        utils.create_folder(log_path)

        now = time.strftime("%d-%m-%Y_%H-%M-%S")
        self.result_path = os.path.join(self.result_path,
                                        'result_{}.txt'.format(now))

        log_path = os.path.join(
            log_path, self.create_log_file_name())
        requests_sender.RequestsSender.init_log_file(log_path)
        utils.create_folder(self.options.info_dir)
Esempio n. 58
0
 def __init__(self, req_info_file_path=None, log=False):
     self.log = log
     self.req_info_file_path = req_info_file_path
     self.path = os.path.join(os.path.dirname(__file__), 'temp')
     utils.create_folder(self.path)
     pass