Esempio n. 1
0
 def test_existence_checking(self):
     """Tests wheather not existing datasets are marked as not existing
     and wheather proper month parameter wouldn't be consider inproper
     """
     params = [f'2010-{num:0>2}' for num in range(1, 13)]  # all month of 2010
     for month in params:
         with self.assertRaises(ValueError) as cm:  # ValueError should be raised with proper message
             download_data(month)
         self.assertEqual(f'Dataset from {month} cannot be found on lichess.org', cm.exception.args[0], msg=month)
Esempio n. 2
0
 def test_parameter_checking(self):
     """Test if wrong parameters are recognised as wrong
     """
     params = ['12-2015', '12-15', '2015_12', '2015.12', '12.2015', '12/2015', '12/15', '2015-00', '2015-13']
     # list of examples of possible wrong month parameters
     for month in params:
         with self.assertRaises(ValueError) as cm:  # ValueError should be raised with proper message
             download_data(month)
         self.assertEqual('Month parameter should be in form `yyyy-mm`', cm.exception.args[0], msg=month)
Esempio n. 3
0
 def test_downloading(self):
     """Tests wheather file was saved to returned location
     """
     month = '2013-01'  # smallest of available datasets
     path = download_data(month)
     self.assertTrue(os.path.isfile(path), msg='File on returned location does not exist')
     os.remove(path)
def main():
    if not os.path.isdir(DATASETS):
        raise FileNotFoundError('Folder given to save datasets does not exist.')
    datasets = sys.argv[1:]  # use datasets given by user
    for month in set(datasets):
        filename = download_data(month)
        out_path = os.path.join(DATASETS, filename)
        preprocess_data(filename, out_path)
    print(f'Successfully downloaded and preproccessed {len(set(datasets))} file(s).')
    list_of_candidates = save_data.load_saved_data(working_directory,
                                                   "list_of_candidates.data")
    edge_cases = save_data.load_saved_data(working_directory,
                                           "edge_cases.data")
    candidate_to_committee_map = save_data.load_saved_data(
        working_directory, "candidate_to_committee_map.data")
    committee_to_candidate_map = save_data.load_saved_data(
        working_directory, "committee_to_candidate_map.data")
    list_of_committees = save_data.load_saved_data(working_directory,
                                                   "list_of_committees.data")

elif not saved_data_exists:
    print("Did not detect saved data, reloading data")
    # Check for necessary files and download them
    print("Checking for Federal Election Commission files")
    downloader.download_data(working_directory)
    print("Reading election files and creating data structures")
    # Create list of election campaign objects
    list_of_campaigns = campaign.read_election_data(working_directory)
    list_of_candidates = candidate.read_candidate_data(working_directory)
    list_of_committees = committee.read_committee_data(working_directory)
    # Cross-link the data
    print("Crosslinking all the data files together")
    list_of_campaigns, list_of_candidates, edge_cases, candidate_to_committee_map, committee_to_candidate_map = \
        data_crossmap.link_campaigns_candidates_and_committees_together(list_of_campaigns, list_of_candidates,
                                                                        list_of_committees)
    print("Saving the data structures")
    all_data = [
        list_of_campaigns, list_of_candidates, edge_cases,
        candidate_to_committee_map, committee_to_candidate_map,
        list_of_committees
Esempio n. 6
0
def main():

    # reading the command line arguments
    parser = argparse.ArgumentParser(
        description='Read in file paths and other parameters.')
    parser.add_argument('--technique',
                        choices=['nmf', 'unet'],
                        help='technique to use to segment neurons',
                        default='nmf',
                        type=str)
    parser.add_argument('--k',
                        help='number of blocks to estimate per block',
                        default="full",
                        type=str)
    parser.add_argument('--max_size',
                        help='max_size maximum size of each region',
                        default="full",
                        type=str)
    parser.add_argument('--min_size',
                        help='min_size minimum size for each region',
                        default=20,
                        type=int)
    parser.add_argument('--max_iter',
                        help='max_iter maximum number of algorithm iterations',
                        default=20,
                        type=int)
    parser.add_argument(
        '--percentile',
        help=
        'percentile value for thresholding (higher means more thresholding)',
        default=95,
        type=int)
    parser.add_argument(
        '--overlap',
        help=
        'overlap value for determining whether to merge (higher means fewer merges)',
        default=0.1,
        type=float)

    args = parser.parse_args()

    #downloading the data as zip files
    dld.download_data()

    #calling extractor to extract the downloaded files
    zip.extract_zips()

    technique = args.technique
    k_value = args.k
    max_size_value = args.max_size
    min_size_value = args.min_size
    max_iter_value = args.max_iter
    percentile_value = args.percentile
    overlap_value = args.overlap

    if technique == 'nmf':
        nmf.NMF_experiments(k=k_value,
                            max_size=max_size_value,
                            min_size=min_size_value,
                            percentile=percentile_value,
                            max_iter=max_iter_value,
                            overlap=overlap_value)
    elif technique == "unet":
        print('Warning: This code is under progress')
        train_image_path, test_image_path, train_region_path = un.get_train_test_region_paths(
        )
        train_images_list = un.get_image_list(train_image_path)
        un.create_nparray(train_images, "train.npy")
        test_images_list = un.get_image_list(test_image_path)
        un.create_nparray(test_images_list, "test.npy")
        mask_list = un.region_to_mask(train_region_path)
        un.train_model()
        result = un.predict()
        masks = un.prepare_masks(result)
        un.masks_to_json(masks)
        un.remove_npy()
Esempio n. 7
0
import optical_flow as opt
import prepare_unet_data as unetdata

if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--mode',type=str,help='train or test')
    parser.add_argument('--preproc_type',type=str,help='full or mean  or normalize')
    parser.add_argument('--optical_flow',type=str,help='full or step_wise  or first_two')
    parser.add_argument('--image_processing',type=str,help='none or sobel or roberts or prewitt or kirsch')
    parser.add_argument('--unet',type=str,help='T or F')
    args = parser.parse_args()


    #downloading the data as tar files
    dld.download_data()

    #calling extractor to extract the downloaded files
    ext.extract_tars()

    if args.mode =='train':
        #reading the names of the files in a list
        file_list = read.read_filename('../dataset/train.txt')

        #reading the whole data as list of list containing all frames for each file
        whole_data = read.load_whole_data('../dataset/data', file_list)

        #reading the masks in the list
        mask_list = read.load_mask('../dataset/masks', file_list)

    if args.mode =='test':
Esempio n. 8
0
#         except TimeoutException:
#             continue
#         item_data = MongoRepo.create(item_data)
#         download_data(item_data)

start_time = datetime.datetime.now()
logger.info('Script started at {}'.format(start_time))

with Hydralians() as hydralians:
    category_hrefs = hydralians.get_category_hrefs()
    item_hrefs = hydralians.get_item_hrefs(category_hrefs)
    logger.info('Items to parse: {}'.format(len(item_hrefs)))
    # for item_href in item_hrefs:
    #     MongoRepo.create_product_url({'url': item_href})
    items_bar = tqdm(total=len(item_hrefs))
    items_bar.set_description(desc='Items')
    for item_href in item_hrefs:
        items_bar.update()
        try:
            item_data = hydralians.get_item_data(item_href)
        except Exception as ex:
            logger.info('Problem with {}'.format(item_href))
            logger.error(str(ex))
            continue
        item_data = MongoRepo.create(item_data)
        download_data(item_data)
    items_bar.close()

end_time = datetime.datetime.now()
logger.info('Script ended at {}'.format(end_time))