def print_debug_dict(fdebug_file): ''' Print debug dictionary to file ''' global g_err_dbg, g_err_dct_p_unit, g_err_dct_p_file if g_err_dbg: # write to csv files fdebug_file_units = '.'.join( fdebug_file.split('.')[:-1]) + '.units.csv' with open(fdebug_file_units, 'w', encoding='utf-8') as write_file: write_file.write('unit_uri_or_suffix,tp,fp,fn\n') for atm_unt_full_uri, atm_unt_inst in g_err_dct_p_unit.items(): atm_unt_uri_to_print = atm_unt_full_uri.split("#")[-1] write_file.write(atm_unt_uri_to_print + "," + str(atm_unt_inst['tp']) + "," \ + str(atm_unt_inst['fp']) + "," + str(atm_unt_inst['fn']) + "\n") fdebug_file_files = '.'.join( fdebug_file.split('.')[:-1]) + '.files.csv' with open(fdebug_file_files, 'w', encoding='utf-8') as write_file: write_file.write('filename,tp,fp,fn\n') for filename, file_inst in g_err_dct_p_file.items(): write_file.write(filename + "," + str(file_inst['tp']) + "," \ + str(file_inst['fp']) + "," + str(file_inst['fn']) + "\n") fclrprint( f'Dumped debug results to files {fdebug_file_units}, {fdebug_file_files}', 'c')
def main(): global g_ignore_articles, g_list_of_ignored_articles g_ignore_articles = False ap = ArgumentParser( description= f'Validate files (xlsx, ccutvld.json) and output validation results.\n\tUSAGE: python {basename(__file__)} -d DIR_NAME' ) ap.add_argument('-d', '--dir_name', help='directory path.', type=str) ap.add_argument( '-o', '--output_debug_file', help='If specified, print debug summary to output file (csv).', type=str) ap.add_argument( '-g', '--ignore_files_list', help= 'If specified, ignore the list of file titles in given file (json).', type=str) args = ap.parse_args() if args.dir_name: fclrprint(f'Preparing to run over files in directory {args.dir_name}') # load list of filenames to ignore if args.ignore_files_list: g_ignore_articles = True with open(args.ignore_files_list, 'r') as infile: g_list_of_ignored_articles = load(infile) # test each .xlsx and .ccutvld.json ccut_test_xlsx_files_in_dir(args.dir_name, args.output_debug_file) else: fclrprint(f'Directory path was not provided.', 'r') exit(1)
def get_leaf_nodes(self): ''' Get leaf nodes in graph. ''' list_of_leaf_nodes = list() for seg in self.sg: if len(seg.children) == 0: list_of_leaf_nodes.append(seg) fclrprint('leaf [%s]' % (str(seg)), 'p') return list_of_leaf_nodes
def reset_all_tables(self): ''' Reset all tables (geom, map, contain). ''' sql_reset_all_tables = sqlstr_reset_all_tables(self.geom_table_name, self.SRID) cur = self.connection.cursor() cur.execute(sql_reset_all_tables) self.pgcprint(cur.query.decode()) # commit changes self.connection.commit() fclrprint('Reset tables finished', 'c')
def export_relations_jl_file(self, rel_outputfile): ''' Export relations list to json-lines file ''' with open(rel_outputfile, 'w') as write_file: for seg in self.sg: for child_gid in seg.children.keys(): line_dict = OrderedDict() line_dict['parent_gid'] = seg.gid line_dict['child_gid'] = child_gid write_file.write(dumps(line_dict) + '\n') fclrprint('Exported realtions info to file %s' % (rel_outputfile), 'c')
def calc_and_print_stats(true_pos, false_pos, false_neg, color='c'): ''' Calculate and print validation statistics (Precision, Recall, F1, etc...). ''' try: precision = (float(true_pos)) / (true_pos + false_pos) recall = (float(true_pos)) / (true_pos + false_neg) f1_score = (2.0 * precision * recall) / (precision + recall) fclrprint('f1=%.4f | precision=%.4f, recall=%.4f | tp=%d, fp=%d, fn=%d' % \ (f1_score, precision, recall, true_pos, false_pos, false_neg), color) except: fclrprint(f'Something went terribly wrong in stats calculations...', 'r')
def export_geom_table_to_file(self, geometry_output_jl): ''' Export the geometry file to some json-lines file. ''' export_sql = sqlstr_export_geom_table_to_file(self.geom_table_name, geometry_output_jl) cur = self.connection.cursor() cur.execute(export_sql) self.pgcprint(cur.query.decode()) # commit changes self.connection.commit() fclrprint('Exported geomtery info to file %s' % (geometry_output_jl), 'c')
def from_shapefile(cls, path, pg_channel_obj, name): ''' Create a segment class from shapefile. ''' start_time = time() cur = pg_channel_obj.connection.cursor() working_segment_table_name = 'active_seg' sql_create_table = sqlstr_create_gid_geom_table( working_segment_table_name, pg_channel_obj.SRID) cur.execute(sql_create_table) pg_channel_obj.pgcprint(cur.query.decode()) shapefile = ogr_open(path) layer = shapefile.GetLayer(0) sql_insert_geom_values_to_table = ''' INSERT INTO %s (geom) VALUES (ST_MULTI(ST_GeometryFromText(%s, %s))) ''' total_feature_count_in_map = layer.GetFeatureCount() for i in range(total_feature_count_in_map): feature = layer.GetFeature(i) wkt = feature.GetGeometryRef().ExportToWkt() cur.execute( sql_insert_geom_values_to_table, (AsIs(working_segment_table_name), wkt, pg_channel_obj.SRID)) sql_insert_new_segment = sqlstr_insert_new_record_to_geom_table( pg_channel_obj.geom_table_name, working_segment_table_name) cur.execute(sql_insert_new_segment) pg_channel_obj.pgcprint(cur.query.decode()) fetchall = cur.fetchall() if len(fetchall) != 1: raise ValueError( "Fetched zero or more entries (should be exactly 1): fetchall: %s " % (fetchall)) gid = fetchall[0][0] cur.execute('DROP TABLE %s' % (AsIs(working_segment_table_name))) pg_channel_obj.pgcprint(cur.query.decode()) # commit changes pg_channel_obj.connection.commit() fclrprint( 'Created %s from %s (%d geometry lines)' % (name, path, total_feature_count_in_map), 'c') seg = cls(pg_channel_obj, gid, name, time() - start_time) return seg
def export_segments_jl_file(self, seg_outputfile): ''' Export segments list to json-lines file ''' with open(seg_outputfile, 'w') as write_file: for seg in self.sg: line_dict = OrderedDict() line_dict['gid'] = seg.gid line_dict['name'] = seg.name line_dict['gen_time'] = seg.gen_time seg_yrs = list() # TODO: mapping from name to year should be read from an external file if '_' not in seg.name: seg_yrs.append(seg.name[0:4]) line_dict['years'] = seg_yrs write_file.write(dumps(line_dict) + '\n') fclrprint('Exported segments info to file %s' % (seg_outputfile), 'c')
def __init__(self, config_path, verbosity, reset_tables=False): ''' Initialize PostGISChannel. ''' # verbosity for easier debugability self.verbosity = verbosity # load config file try: config = load(open(config_path, "r")) except Exception as e: print("Cannot load configuration file, ERROR: %s" % str(e)) exit(-1) # load config parameters try: self.dbname = config["dbname"] self.user = config["user"] self.host = config["host"] self.geom_table_name = config["geometry_table_name"] geo_type = config[ "geometry_type"] # "MULTILINESTRING" / "MULTIPOLYGON" self.SRID = config["SRID"] except LookupError: print("Invalid configuration file") exit(-1) # establish connection try: self.connection = connect(dbname=self.dbname, user=self.user, host=self.host) fclrprint( 'Connection established to %s [%s@%s]' % (self.dbname, self.user, self.host), 'g') except psycopg2_error as e: print("Unable to connect to the database: %s" % str(e)) exit(-1) # set geometry type set_global_geom_type(geo_type) # reset tables if requested if reset_tables: self.reset_all_tables()
def main(): ap = ArgumentParser( description= 'Process shapefiles (vector data) and generate (jl) files with line segmentation info.\n\tUSAGE: python %s -d DIR_NAME -c CONFIG_FILE' % (basename(__file__))) ap.add_argument('-d', '--dir_name', help='Directory path with shapefiles.', type=str) ap.add_argument('-c', '--config_file', help='Input configuration file.', type=str) ap.add_argument('-o', '--output_file', help='Output geometry file (jl).', default='line_seg.jl', type=str) ap.add_argument('-v', '--debug_prints', help='Print additional debug prints.', default=False, action='store_true') ap.add_argument('-r', '--reset_db', help='Reset Databases prior to processing.', default=False, action='store_true') args = ap.parse_args() if args.dir_name and args.config_file: fclrprint('Going to process shapefiles in dir %s using configurations from file %s...' \ % (args.dir_name, args.config_file)) process_shapefiles(args.dir_name, args.config_file, args.output_file, args.debug_prints, args.reset_db) else: fclrprint('Input directory and configuration file were not provided.', 'r') exit(1)
def process_shapefiles(directory_path, configuration_file, outputfile, verbosity_on, reset_database): ''' Generate csv tables from shapefile in a given directory, use given configurations to interact with POSTGRESQL to execute POSTGIS actions. ''' channel_inst = PostGISChannel(configuration_file, verbosity_on, reset_database) sgraph = SegmentsGraph(channel_inst) start_time = time() for fname in listdir(directory_path): if fname.endswith(".shp"): it_start_time = time() fname_no_ext = fname.split('.shp')[0] full_fname = directory_path + '/' + fname fclrprint('Processing %s' % (full_fname), 'c') try: seg = Segment.from_shapefile(full_fname, channel_inst, fname_no_ext) sgraph.add_segment_to_graph(seg) except Exception as e: fclrprint( 'Failed processing file %s\n%s' % (full_fname, str(e)), 'r') exit(-1) fclrprint( 'Map addition took %s' % (str(timedelta(seconds=int(time() - it_start_time))).zfill(8)), 'c') fclrprint( 'Total running time %s' % (str(timedelta(seconds=int(time() - start_time))).zfill(8)), 'c') print(sgraph) fclrprint('Segmentation finished!', 'g') sgraph.export_geom_jl_file(outputfile.replace('.jl', '.geom.jl')) sgraph.export_segments_jl_file(outputfile.replace('.jl', '.seg.jl')) sgraph.export_relations_jl_file(outputfile.replace('.jl', '.rel.jl'))
def process_file(fname): ''' Processes a file looking for units, returns a structured output (dict) of the file. ''' global g_tot_num_of_sheets # init file dictionaries f_dict = dict() raw_f_dict = dict() # Load spreadsheets xl = ExcelFile(fname) # iterate over sheets for sheet_name in xl.sheet_names: g_tot_num_of_sheets += 1 # Load a sheet into a DataFrame by name df = xl.parse(sheet_name, header=None, skip_blank_lines=False) raw_f_dict[sheet_name] = df fclrprint(f'Processing Sheet {sheet_name}...') sht_dict = process_sheet(df) if sht_dict: f_dict[sheet_name] = sht_dict if f_dict: return f_dict, raw_f_dict return None, raw_f_dict
def main(): ap = ArgumentParser(description=f'Process a spreasheet file (xlsx) and generate a dictionary file (json) of cell locations in which units were detected.\n\tUSAGE: python {basename(__file__)} -i INPUT_FILE') ap.add_argument('-i', '--input_file', help='input spreasheet file (xlsx).', type=str) args = ap.parse_args() if args.input_file: init_globals() output_fname = '.'.join(args.input_file.split('.')[:-1]) + '.ccut.json' fclrprint(f'Processing file {args.input_file}') dict_out, _ = process_file(args.input_file) with open(output_fname, 'w') as outfile: dump(dict_out, outfile, indent=2) fclrprint(f'Done... generated file {output_fname}', 'g') else: fclrprint(f'An input file was not provided.', 'r') exit(1)
def ccut_test_xlsx_files_in_dir(input_dir_name, output_debug_file): ''' Process the xlsx files in a given directory and match against its given validation file. ''' global g_err_dbg, g_err_dct_p_file, g_ignore_articles, g_list_of_ignored_articles init_globals() init_ccut_validation(output_debug_file) true_pos, false_pos, false_neg = 0, 0, 0 tot_files = get_num_of_files_in_dir(input_dir_name, ".xlsx") files_processed = 0 actual_files_processed = 0 start = time() for xfname in listdir(input_dir_name): if xfname.endswith(".xlsx"): files_processed += 1 xfname_no_suffix = xfname.split('.xlsx')[0] if g_ignore_articles: # check if article is in provided list if xfname_no_suffix in g_list_of_ignored_articles: continue actual_files_processed += 1 xfname_full = join(input_dir_name, xfname) vfname_full = join(input_dir_name, xfname_no_suffix + '.ccutvld.json') if not exists(vfname_full): fclrprint( f'File {xfname_full} does not have a results file. Skipping...', 'r') continue fclrprint( f'Processing file {xfname_full} and comparing results to {vfname_full}' ) act_dict, _ = process_file(xfname_full) #import IPython; IPython.embed(); with open(vfname_full, 'r') as read_file: val_dict = load(read_file) f_tp, f_fp, f_fn = compare_actual_with_expected_dicts( act_dict, val_dict) # update debug dictionary if g_err_dbg: g_err_dct_p_file[xfname] = dict() g_err_dct_p_file[xfname]['tp'] = f_tp g_err_dct_p_file[xfname]['fp'] = f_fp g_err_dct_p_file[xfname]['fn'] = f_fn # add to total true_pos += f_tp false_pos += f_fp false_neg += f_fn if (files_processed % 10) == 0: eta = (tot_files - files_processed) * (time() - start) / files_processed eta = str(timedelta(seconds=int(eta))).zfill(8) print('Completion: %04.2f%%, eta: %s' % (100 * files_processed / tot_files, eta)) calc_and_print_stats(true_pos, false_pos, false_neg) calc_and_print_stats(true_pos, false_pos, false_neg, color='g') print( f'Processed a total of {actual_files_processed} files ({get_tot_num_of_sheets()} sheets) out of {files_processed} .xlsx files in given directory {input_dir_name}!' ) print_debug_dict(output_debug_file)
def pgcprint(self, pstr): ''' Debug printing method. ''' if self.verbosity: fclrprint(pstr, 'b')
res = results["results"]["bindings"][i][k]['value'] re.append(res) if k == 'wkt': linestring_data.append(res) ret.append(re) except: keys = ['No results'] return render_template('index.html', classdropdown=SPARQL_EXAMPLES.keys(), selectedclass='', raw_sparql=_sparql, data=linestring_data, key=keys, result=ret) if __name__ == '__main__': global g_sparql parser = ArgumentParser() parser.add_argument('-s') args = parser.parse_args() app.config['sparql_endpoint'] = "http://localhost:3030/linkedmaps/query" if args.s: app.config['sparql_endpoint'] = args.s else: fclrprint(f'---You did not set a SPARQL endpoint, using default', 'r') fclrprint(f'---Your SPARQL endpoint: {app.config["sparql_endpoint"]}', 'g') g_sparql = SPARQLWrapper(app.config['sparql_endpoint']) g_sparql.setReturnFormat(JSON) app.run(host="localhost", port=5000, debug=True)
def main(): ap = ArgumentParser( description= 'Process line segmetation output files (jl) and generate (ttl) file containing triples.\n\tUSAGE: python %s -g GEOMETRY_FILE -s SEGMENTS_FILE -r RELATIONS_FILE -l OSM_URIS_FILE' % (basename(__file__))) ap.add_argument('-g', '--geometry_file', help='File (jl) holding the geometry info (wkt).', type=str) ap.add_argument('-s', '--segments_file', help='File (jl) holding segments info (metadata).', type=str) ap.add_argument( '-r', '--relations_file', help='File (jl) holding relations info (parents, children).', type=str) ap.add_argument('-l', '--osm_uris_file', help='File (jl) holding OpenStreetMap info.', type=str) ap.add_argument('-o', '--output_file', help='The output file (ttl) with the generated triples.', default='linked_maps.maps.ttl', type=str) args = ap.parse_args() if args.geometry_file and args.relations_file and args.segments_file: fclrprint( 'Going to process files %s, %s, %s...' % (args.geometry_file, args.segments_file, args.relations_file)) # initialize graph with gid-to-wkt mapping file lm_graph = LinkedMapGraph(args.geometry_file) # load segments info with open(args.segments_file) as read_file: for line_r in read_file: seg_dict = loads(line_r) lm_graph.add_geo_feature_node(seg_dict['gid'], seg_dict['name'], seg_dict['years']) # load relations info with open(args.relations_file) as read_file: for line_r in read_file: rel_dict = loads(line_r) lm_graph.add_geo_child_to_parent(rel_dict['parent_gid'], rel_dict['child_gid']) # load OpenStreetMap info if args.osm_uris_file: with open(args.osm_uris_file) as read_file: for line_r in read_file: osm_dict = loads(line_r) lm_graph.add_openstreetmap_uris_to_gid( osm_dict['gid'], osm_dict['osm_uris']) # materialize triples lm_graph.dt.serialize(args.output_file, format="turtle") fclrprint('Done, generated ttl file %s!' % (args.output_file), 'g') else: fclrprint('Geometry, segments and relations files were not provided.', 'r') exit(1)
def add_segment_to_graph(self, segment): ''' Add segment to the graph. ''' leaves = self.get_leaf_nodes() # append new segment to graph self.sg.append(segment) list_of_leaf_gids = list() for leaf_seg in leaves: # intersect int_seg = leaf_seg.intersect( segment, str('i_' + hash_string_md5('i_%s_%s' % (leaf_seg.name, segment.name)))) if int_seg: fclrprint( '[%d] = [%d] AND [%d]' % (int_seg.gid, leaf_seg.gid, segment.gid), 'p') self.sg.append(int_seg) list_of_leaf_gids.append(int_seg.gid) # leaf minus intersection (if intersection is not empty) leaf_min_int = leaf_seg.minus( int_seg, str('m_' + hash_string_md5('m_%s_%s' % (leaf_seg.name, int_seg.name)))) if leaf_min_int: fclrprint( '[%d] = [%d] \\ [%d]' % (leaf_min_int.gid, leaf_seg.gid, int_seg.gid), 'p') self.sg.append(leaf_min_int) else: fclrprint( '{} = [%d] \\ [%d]' % (leaf_seg.gid, int_seg.gid), 'gray') else: fclrprint('{} = [%d] AND [%d]' % (leaf_seg.gid, segment.gid), 'gray') if list_of_leaf_gids: # segment minus union-of-intersections segment_min_union_ints = segment.minus_union_of_segments( list_of_leaf_gids, str('mu_' + hash_string_md5('mu_%s_UL' % (segment.name)))) if segment_min_union_ints: fclrprint( '[%d] = [%d] \\ UNION%s' % (segment_min_union_ints.gid, segment.gid, str(list_of_leaf_gids)), 'p') self.sg.append(segment_min_union_ints) else: fclrprint( '{} = [%d] \\ UNION%s' % (segment.gid, str(list_of_leaf_gids)), 'gray') # commit changes self.pgchannel.connection.commit()