def __init__(self, **kwargs): if (kwargs.get('text', None)): self.fields = parse_csv(kwargs.get('text'), kwargs.get('col_names')) elif (kwargs.get('fields', None)): self.fields = kwargs.get('fields') else: raise Exception
def executor(): arguments = parse_input() tables = DbHandler() service_args_handler(arguments, tables) source = source_path_handler(arguments) parsed_data = parse_csv(source) create_struct(parsed_data, tables, arguments)
def parse_csv_or_die(args, parse_row, errors=[], skip=1, **kwargs): """Parse the CSV file or print the errors and exit. """ try: with open(args["<file>"], "r") as f: people = parser.parse_csv(f, parse_row, skip=skip, **kwargs) except KeyError: errors.append("<file> is required") except FileNotFoundError: errors.append(f"'{args['<file>']}' doesn't exist") except parser.CSVException as e: errors.extend(e.errors) if errors: for e in errors: print(e) sys.exit(1) return people
def bvgFeatureExtractor(bvg_file, index=None, intersections=False, fuzzy_grid=False, vein_angles=False, avg_angles=False, rishi_angles=False, up_down=False, midpoint_veins=False, print_features=False, print_advanced_features=False): veins = parser.parse_csv(bvg_file) dims = parser.get_dimensions(bvg_file) fuzz_cell_scale = dims[1] / fuzz_cell_dimension out_img = np.zeros((dims[1], dims[0], 3), np.uint8) drawVeins(veins, out_img) if intersections: extractIntersections(veins, out_img) extractIntersectionDistances() if fuzzy_grid: extractGrid(veins, out_img, fuzz_cell_scale, drawLines=True) if vein_angles: extractVeinAngles(veins) if avg_angles: advanced_vein_features['avg_angles'] = avgVeinAngles(vein_features['veins'], buckets=True) if rishi_angles: advanced_vein_features['rishi_angles'] = completeVeinAngles(veins, buckets=True) if up_down: advanced_vein_features['up_down'] = upDowns(vein_features['fuzzy_grid_heatmap']) if midpoint_veins: advanced_vein_features['midpoint_veins'] = midpointVeins(veins, buckets=True) if print_features: print 'PRINTING BASIC VEIN FEATURES:\n=====================\n' pprint(vein_features) if print_advanced_features: print 'PRINTING ADVANCED VEIN FEATURES:\n=====================\n' for key, val in advanced_vein_features.iteritems(): print 'Key: %s : %s' % (key, ', '.join(str(i) for i in val)) if index is not None: grid = int('22' + str(index))
def test_parse_csv(self): parse_csv('small_csv.txt') found = self.db.csv_items.find({'id': 402983319863}).count() assert found == 1
def main(): ''' The menu shown at the beginning of the Page Rank program. It asks whether the file is a csv or snap file, then the file name. Ex: > python lab3.py [-w] CSC 466: Lab 3 - Page Rank & Link Analysis Parse: 1) csv 2) snap (User enters 1 or 2) File name: (User enters file name here) There is an optional flag '-w' that is used for the Football csv. The program outputs every 1000 lines (to ensure that it's parsing) and then at the end of the page rank algorithm, print out the top 20 nodes and how long it took to calculate page rank. Note: -w doesn't quite work at the moment. Please ignore it for now. ''' is_weighted = False # Used for '-w' flag # Setting variable if '-w' is used if len(sys.argv) > 1: if sys.argv[1] == '-w': is_weighted = True # Menu print('CSC 466: Lab 3 - PageRank & Link Analysis') parse_menu = raw_input('Parse:\n' + '1) csv\n' + '2) snap\n' ) file_name = raw_input('File name: ') # PARSING - CSV Files # Note: The algorithm is the same, just parsing is different. if parse_menu == '1': print('Parsing/Creating Graph...') start = time.time() # Tracking time # Parses a csv file and returns a tuple (list, dictionary, dictionary) if is_weighted == False: (nodes, out_degrees, in_degrees) = parser.parse_csv(file_name) else: (nodes, out_degrees, in_degrees) = parser.parse_weighted_csv(file_name) end = time.time() print('Parse/Graph Set-up Time: ' + str(end - start) + ' seconds') # Sets up page rank structures pagerank.set_up(nodes, out_degrees, in_degrees) # PAGE RANKING print('Page Ranking...') start = time.time() num_iters = pagerank.page_rank(0) # Stores # of page rank iterations end = time.time() # Statistics print('Page Rank Time: ' + str(end-start) + ' seconds') print('Page Rank Iterations: ' + str(num_iters)) # PARSING - SNAP Files elif parse_menu == '2': print('Parsing/Creating Graph...') start = time.time() # Tracking time # Parses a SNAP file and returns a tuple (list, dictionary, dictionary) (nodes, out_degrees, in_degrees) = parser.parse_snap(file_name) end = time.time() print('Parse/Graph Set-up Time: ' + str(end-start) + 'seconds') # Sets up page rank structures pagerank.set_up(nodes, out_degrees, in_degrees) # PAGE RANKING print('Page Ranking...') start = time.time() num_iters = pagerank.page_rank(0) # Stores # of page rank iterations end = time.time() # Statistics print('Page Rank Time: ' + str(end-start) + ' seconds') print('Page Rank Iterations: ' + str(num_iters)) # Wrong input else: print('Invalid input - exiting')
legend_html = ''' <div style="position: fixed; bottom: 50px; left: 50px; border:2px solid grey; z-index:9999; font-size:14px; background:white; "> The Population Markers <br> < 10.000.000 <i class="fa fa-circle fa-2x" style="color:green"></i><br> < 20.000.000 <i class="fa fa-circle fa-2x" style="color:purple"></i><br> > 10.000.000 <i class="fa fa-circle fa-2x" style="color:orange"></i> </div> ''' map.add_child(mov_density) map.add_child(movies_loc) map.add_child(population) map.get_root().html.add_child(folium.Element(legend_html)) map.add_child(folium.LayerControl()) map.save('{}.html'.format(year)) if __name__ == "__main__": year = get_year() locations = parse_csv("locations.csv") places = to_dict(locations, year) usr_loc = get_user_location() mov_locations = to_dict(locations, year) filtered = to_sorted(locations, year) map_creator(usr_loc, mov_locations, filtered, year)
def main(): ''' The menu shown at the beginning of the Page Rank program. It asks whether the file is a csv or snap file, then the file name. Ex: > python lab3.py [-w, -c] CSC 466: Lab 3 - Page Rank & Link Analysis Parse: 1) csv 2) snap (User enters 1 or 2) File name: (User enters file name here) There is an optional flag '-w' that is used for the Football csv. The program outputs every 1000 lines (to ensure that it's parsing) and then at the end of the page rank algorithm, print out the top 20 nodes and how long it took to calculate page rank. Note: -w doesn't quite work at the moment. Please ignore it for now. ''' fileSpecs = specs() is_weighted = False # Used for '-w' flag # Setting variable if '-w' is used if len(sys.argv) > 1: if sys.argv[1] == '-w': is_weighted = True # Menu print('CSC 466: Lab 3 - PageRank & Link Analysis') parse_menu = raw_input('Parse:\n' + '1) csv\n' + '2) snap\n' ) file_name = raw_input('File name: ') version = raw_input('Xeon Phi, Cuda or Both? (x/c/b): ') # PARSING - CSV Files # Note: The algorithm is the same, just parsing is different. if parse_menu == '1' or parse_menu == '2': if parse_menu == '1': print('Parsing/Creating Graph...') start = time.time() # Tracking time # Parses a csv file and returns a tuple (list, dictionary, dictionary) if is_weighted == False: (nodes, out_degrees, in_degrees, names) = parser.parse_csv(file_name) else: (nodes, out_degrees, in_degrees, names) = parser.parse_weighted_csv(file_name) end = time.time() print('Parse/Graph Set-up Time: ' + str(end - start) + ' seconds') # PARSING - SNAP Files else: print('Parsing/Creating Graph...') start = time.time() # Tracking time # Parses a SNAP file and returns a tuple (list, dictionary, dictionary) (nodes, out_degrees, in_degrees, names) = parser.parse_snap(file_name) end = time.time() print('Parse/Graph Set-up Time: ' + str(end-start) + 'seconds') if file_name.rfind('/') != '-1': file_name = file_name[file_name.rfind('/') + 1:len(file_name)] (numIterations, numNodes, numEdges) = fileSpecs[file_name] print numIterations, numNodes, numEdges print out_degrees.get(12028) ''' Call C Program ''' if version == 'x' or version == 'b': p = subprocess.Popen(['./pr_phi', str(numNodes), str(numEdges), str(numIterations)], stdout=subprocess.PIPE, stdin=subprocess.PIPE) for node in nodes: if in_degrees.get(node) is not None: for i in range (0, len(in_degrees[node])): p.stdin.write('%d %d\n' % (int(node), int(in_degrees[node][i]))) output = p.communicate()[0] output = output[:-1] p.stdin.close() useNames = parse_menu == '1' or file_name == "wiki-Vote.txt" print str(useNames) printPageRankValues(output, useNames, names) if version == 'c' or version == 'b': p = subprocess.Popen(cuda_command, stdout=subprocess.PIPE, stdin=subprocess.PIPE) ''' for node in nodes: for i in range (0, len(in_degress[node])): p.stdin.write('%d %d\n' % (int(node), int(in_degrees[node][i]))) ''' # Sets up page rank structures ''' pagerank.set_up(nodes, out_degrees, in_degrees) if file_name == 'wiki-Vote.txt': parse_menu = '1' # PAGE RANKING print('Page Ranking...') start = time.time() num_iters = pagerank.page_rank(0, names, parse_menu) # Stores # of page rank iterations end = time.time() # Statistics print('Page Rank Time: ' + str(end-start) + ' seconds') print('Page Rank Iterations: ' + str(num_iters)) ''' # Wrong input else: print('Invalid input - exiting')
def demo(opt): inputimage = opt.input_image boxesscv = opt.boxescsv bboxes = parse_csv(inputimage, boxesscv) """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) # preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index, preds_size) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) log = open(f'{opt.output_folder}result.csv', 'w') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_index, (pred, pred_max_prob) in enumerate( zip(preds_str, preds_max_prob)): if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] for pts in bboxes[img_index]: x, y = pts log.write(f'{x},{y},') log.write(f'{pred}\n') log.close() # copy log to local output folder os.system(f'cp {opt.output_folder}result.csv /input/output') shutil.make_archive('per_word_visual', 'zip', '/input/output')
def main(): """ The menu shown at the beginning of the Page Rank program. It asks whether the file is a csv or snap file, then the file name. Ex: > python lab3.py [-w, -c] CSC 466: Lab 3 - Page Rank & Link Analysis Parse: 1) csv 2) snap (User enters 1 or 2) File name: (User enters file name here) There is an optional flag '-w' that is used for the Football csv. The program outputs every 1000 lines (to ensure that it's parsing) and then at the end of the page rank algorithm, print out the top 20 nodes and how long it took to calculate page rank. Note: -w doesn't quite work at the moment. Please ignore it for now. """ fileSpecs = specs() is_weighted = False # Used for '-w' flag # Setting variable if '-w' is used if len(sys.argv) > 1: if "-w" in sys.argv: is_weighted = True # Menu print ("CSC 466: Lab 3 - PageRank & Link Analysis") parse_menu = "1" file_name = sys.argv[1] version = sys.argv[2] if file_name[-3:] == "csv": parse_menu = "1" elif file_name[-3:] == "txt": parse_menu = "2" else: print "Format not supported" # PARSING - CSV Files # Note: The algorithm is the same, just parsing is different. if parse_menu == "1" or parse_menu == "2": if parse_menu == "1": print ("Parsing/Creating Graph...") start = time.time() # Tracking time # Parses a csv file and returns a tuple (list, dictionary, dictionary) if is_weighted == False: (nodes, out_degrees, in_degrees, names) = parser.parse_csv(file_name) else: (nodes, out_degrees, in_degrees, names) = parser.parse_weighted_csv(file_name) end = time.time() print ("Parse/Graph Set-up Time: " + str(end - start) + " seconds") # PARSING - SNAP Files else: print ("Parsing/Creating Graph...") start = time.time() # Tracking time # Parses a SNAP file and returns a tuple (list, dictionary, dictionary) (nodes, out_degrees, in_degrees, names) = parser.parse_snap(file_name) end = time.time() print ("Parse/Graph Set-up Time: " + str(end - start) + "seconds") if file_name.rfind("/") != "-1": file_name = file_name[file_name.rfind("/") + 1 : len(file_name)] (numIterations, numNodes, numEdges) = fileSpecs[file_name] print numIterations, numNodes, numEdges """ Call C Program """ if version == "phi" or version == "both": p = subprocess.Popen( ["./pr_test", str(numNodes), str(numEdges), str(numIterations)], stdout=subprocess.PIPE, stdin=subprocess.PIPE, ) start = time.time() for node in nodes: if in_degrees.get(node) is not None: for i in range(0, len(in_degrees[node])): p.stdin.write("%d\n" % int(in_degrees[node][i])) for node in nodes: i = len(in_degrees[node]) if in_degrees.get(node) is not None else 0 j = out_degrees[node] if out_degrees.get(node) is not None else 0 p.stdin.write("%d %d %d\n" % (int(node), int(i), int(j))) output = p.communicate()[0] output = output[:-1] p.stdin.close() useNames = parse_menu == "1" or file_name == "wiki-Vote.txt" printPageRankValues(output, useNames, names) if version == "cuda" or version == "both": p = subprocess.Popen( ["./pr_cuda", str(numNodes), str(numEdges), str(numIterations)], stdout=subprocess.PIPE, stdin=subprocess.PIPE, ) for node in nodes: if in_degrees.get(node) is not None: for i in range(0, len(in_degrees[node])): p.stdin.write("%d\n" % int(in_degrees[node][i])) for node in nodes: i = len(in_degrees[node]) if in_degrees.get(node) is not None else 0 j = out_degrees[node] if out_degrees.get(node) is not None else 0 p.stdin.write("%d %d %d\n" % (int(node), int(i), int(j))) output = p.communicate()[0] output = output[:-1] p.stdin.close() useNames = parse_menu == "1" or file_name == "wiki-Vote.txt" print str(useNames) printPageRankValues(output, useNames, names) # Wrong input else: print ("Invalid input - exiting")