def print_debug_dict(fdebug_file):
    ''' Print debug dictionary to file '''

    global g_err_dbg, g_err_dct_p_unit, g_err_dct_p_file

    if g_err_dbg:
        # write to csv files
        fdebug_file_units = '.'.join(
            fdebug_file.split('.')[:-1]) + '.units.csv'
        with open(fdebug_file_units, 'w', encoding='utf-8') as write_file:
            write_file.write('unit_uri_or_suffix,tp,fp,fn\n')
            for atm_unt_full_uri, atm_unt_inst in g_err_dct_p_unit.items():
                atm_unt_uri_to_print = atm_unt_full_uri.split("#")[-1]
                write_file.write(atm_unt_uri_to_print + "," + str(atm_unt_inst['tp']) + "," \
                    + str(atm_unt_inst['fp']) + "," + str(atm_unt_inst['fn']) + "\n")
        fdebug_file_files = '.'.join(
            fdebug_file.split('.')[:-1]) + '.files.csv'
        with open(fdebug_file_files, 'w', encoding='utf-8') as write_file:
            write_file.write('filename,tp,fp,fn\n')
            for filename, file_inst in g_err_dct_p_file.items():
                write_file.write(filename + "," + str(file_inst['tp']) + "," \
                    + str(file_inst['fp']) + "," + str(file_inst['fn']) + "\n")
        fclrprint(
            f'Dumped debug results to files {fdebug_file_units}, {fdebug_file_files}',
            'c')
def main():

    global g_ignore_articles, g_list_of_ignored_articles
    g_ignore_articles = False

    ap = ArgumentParser(
        description=
        f'Validate files (xlsx, ccutvld.json) and output validation results.\n\tUSAGE: python {basename(__file__)} -d DIR_NAME'
    )
    ap.add_argument('-d', '--dir_name', help='directory path.', type=str)
    ap.add_argument(
        '-o',
        '--output_debug_file',
        help='If specified, print debug summary to output file (csv).',
        type=str)
    ap.add_argument(
        '-g',
        '--ignore_files_list',
        help=
        'If specified, ignore the list of file titles in given file (json).',
        type=str)
    args = ap.parse_args()

    if args.dir_name:
        fclrprint(f'Preparing to run over files in directory {args.dir_name}')
        # load list of filenames to ignore
        if args.ignore_files_list:
            g_ignore_articles = True
            with open(args.ignore_files_list, 'r') as infile:
                g_list_of_ignored_articles = load(infile)
        # test each .xlsx and .ccutvld.json
        ccut_test_xlsx_files_in_dir(args.dir_name, args.output_debug_file)
    else:
        fclrprint(f'Directory path was not provided.', 'r')
        exit(1)
Exemple #3
0
    def get_leaf_nodes(self):
        ''' Get leaf nodes in graph. '''

        list_of_leaf_nodes = list()
        for seg in self.sg:
            if len(seg.children) == 0:
                list_of_leaf_nodes.append(seg)
                fclrprint('leaf [%s]' % (str(seg)), 'p')
        return list_of_leaf_nodes
Exemple #4
0
    def reset_all_tables(self):
        ''' Reset all tables (geom, map, contain). '''

        sql_reset_all_tables = sqlstr_reset_all_tables(self.geom_table_name,
                                                       self.SRID)
        cur = self.connection.cursor()
        cur.execute(sql_reset_all_tables)
        self.pgcprint(cur.query.decode())
        # commit changes
        self.connection.commit()
        fclrprint('Reset tables finished', 'c')
Exemple #5
0
    def export_relations_jl_file(self, rel_outputfile):
        ''' Export relations list to json-lines file '''

        with open(rel_outputfile, 'w') as write_file:
            for seg in self.sg:
                for child_gid in seg.children.keys():
                    line_dict = OrderedDict()
                    line_dict['parent_gid'] = seg.gid
                    line_dict['child_gid'] = child_gid
                    write_file.write(dumps(line_dict) + '\n')
        fclrprint('Exported realtions info to file %s' % (rel_outputfile), 'c')
def calc_and_print_stats(true_pos, false_pos, false_neg, color='c'):
    ''' Calculate and print validation statistics (Precision, Recall, F1, etc...). '''

    try:
        precision = (float(true_pos)) / (true_pos + false_pos)
        recall = (float(true_pos)) / (true_pos + false_neg)
        f1_score = (2.0 * precision * recall) / (precision + recall)
        fclrprint('f1=%.4f | precision=%.4f, recall=%.4f | tp=%d, fp=%d, fn=%d' % \
            (f1_score, precision, recall, true_pos, false_pos, false_neg), color)
    except:
        fclrprint(f'Something went terribly wrong in stats calculations...',
                  'r')
Exemple #7
0
    def export_geom_table_to_file(self, geometry_output_jl):
        ''' Export the geometry file to some json-lines file. '''

        export_sql = sqlstr_export_geom_table_to_file(self.geom_table_name,
                                                      geometry_output_jl)
        cur = self.connection.cursor()
        cur.execute(export_sql)
        self.pgcprint(cur.query.decode())
        # commit changes
        self.connection.commit()
        fclrprint('Exported geomtery info to file %s' % (geometry_output_jl),
                  'c')
Exemple #8
0
    def from_shapefile(cls, path, pg_channel_obj, name):
        ''' Create a segment class from shapefile. '''

        start_time = time()
        cur = pg_channel_obj.connection.cursor()

        working_segment_table_name = 'active_seg'
        sql_create_table = sqlstr_create_gid_geom_table(
            working_segment_table_name, pg_channel_obj.SRID)

        cur.execute(sql_create_table)
        pg_channel_obj.pgcprint(cur.query.decode())

        shapefile = ogr_open(path)
        layer = shapefile.GetLayer(0)

        sql_insert_geom_values_to_table = '''
        INSERT INTO %s (geom) VALUES (ST_MULTI(ST_GeometryFromText(%s, %s)))
        '''

        total_feature_count_in_map = layer.GetFeatureCount()
        for i in range(total_feature_count_in_map):
            feature = layer.GetFeature(i)
            wkt = feature.GetGeometryRef().ExportToWkt()
            cur.execute(
                sql_insert_geom_values_to_table,
                (AsIs(working_segment_table_name), wkt, pg_channel_obj.SRID))

        sql_insert_new_segment = sqlstr_insert_new_record_to_geom_table(
            pg_channel_obj.geom_table_name, working_segment_table_name)
        cur.execute(sql_insert_new_segment)
        pg_channel_obj.pgcprint(cur.query.decode())

        fetchall = cur.fetchall()
        if len(fetchall) != 1:
            raise ValueError(
                "Fetched zero or more entries (should be exactly 1): fetchall: %s "
                % (fetchall))
        gid = fetchall[0][0]

        cur.execute('DROP TABLE %s' % (AsIs(working_segment_table_name)))
        pg_channel_obj.pgcprint(cur.query.decode())

        # commit changes
        pg_channel_obj.connection.commit()
        fclrprint(
            'Created %s from %s (%d geometry lines)' %
            (name, path, total_feature_count_in_map), 'c')

        seg = cls(pg_channel_obj, gid, name, time() - start_time)
        return seg
Exemple #9
0
    def export_segments_jl_file(self, seg_outputfile):
        ''' Export segments list to json-lines file '''

        with open(seg_outputfile, 'w') as write_file:
            for seg in self.sg:
                line_dict = OrderedDict()
                line_dict['gid'] = seg.gid
                line_dict['name'] = seg.name
                line_dict['gen_time'] = seg.gen_time
                seg_yrs = list()
                # TODO: mapping from name to year should be read from an external file
                if '_' not in seg.name:
                    seg_yrs.append(seg.name[0:4])
                line_dict['years'] = seg_yrs

                write_file.write(dumps(line_dict) + '\n')
        fclrprint('Exported segments info to file %s' % (seg_outputfile), 'c')
Exemple #10
0
    def __init__(self, config_path, verbosity, reset_tables=False):
        ''' Initialize PostGISChannel. '''

        # verbosity for easier debugability
        self.verbosity = verbosity

        # load config file
        try:
            config = load(open(config_path, "r"))
        except Exception as e:
            print("Cannot load configuration file, ERROR: %s" % str(e))
            exit(-1)

        # load config parameters
        try:
            self.dbname = config["dbname"]
            self.user = config["user"]
            self.host = config["host"]
            self.geom_table_name = config["geometry_table_name"]
            geo_type = config[
                "geometry_type"]  # "MULTILINESTRING" / "MULTIPOLYGON"
            self.SRID = config["SRID"]
        except LookupError:
            print("Invalid configuration file")
            exit(-1)

        # establish connection
        try:
            self.connection = connect(dbname=self.dbname,
                                      user=self.user,
                                      host=self.host)
            fclrprint(
                'Connection established to %s [%s@%s]' %
                (self.dbname, self.user, self.host), 'g')
        except psycopg2_error as e:
            print("Unable to connect to the database: %s" % str(e))
            exit(-1)

        # set geometry type
        set_global_geom_type(geo_type)

        # reset tables if requested
        if reset_tables:
            self.reset_all_tables()
Exemple #11
0
def main():

    ap = ArgumentParser(
        description=
        'Process shapefiles (vector data) and generate (jl) files with line segmentation info.\n\tUSAGE: python %s -d DIR_NAME -c CONFIG_FILE'
        % (basename(__file__)))
    ap.add_argument('-d',
                    '--dir_name',
                    help='Directory path with shapefiles.',
                    type=str)
    ap.add_argument('-c',
                    '--config_file',
                    help='Input configuration file.',
                    type=str)
    ap.add_argument('-o',
                    '--output_file',
                    help='Output geometry file (jl).',
                    default='line_seg.jl',
                    type=str)
    ap.add_argument('-v',
                    '--debug_prints',
                    help='Print additional debug prints.',
                    default=False,
                    action='store_true')
    ap.add_argument('-r',
                    '--reset_db',
                    help='Reset Databases prior to processing.',
                    default=False,
                    action='store_true')
    args = ap.parse_args()

    if args.dir_name and args.config_file:
        fclrprint('Going to process shapefiles in dir %s using configurations from file %s...' \
                  % (args.dir_name, args.config_file))
        process_shapefiles(args.dir_name, args.config_file, args.output_file,
                           args.debug_prints, args.reset_db)
    else:
        fclrprint('Input directory and configuration file were not provided.',
                  'r')
        exit(1)
Exemple #12
0
def process_shapefiles(directory_path, configuration_file, outputfile,
                       verbosity_on, reset_database):
    ''' Generate csv tables from shapefile in a given directory,
    use given configurations to interact with POSTGRESQL to execute POSTGIS actions. '''

    channel_inst = PostGISChannel(configuration_file, verbosity_on,
                                  reset_database)
    sgraph = SegmentsGraph(channel_inst)

    start_time = time()
    for fname in listdir(directory_path):
        if fname.endswith(".shp"):
            it_start_time = time()
            fname_no_ext = fname.split('.shp')[0]
            full_fname = directory_path + '/' + fname
            fclrprint('Processing %s' % (full_fname), 'c')
            try:
                seg = Segment.from_shapefile(full_fname, channel_inst,
                                             fname_no_ext)
                sgraph.add_segment_to_graph(seg)
            except Exception as e:
                fclrprint(
                    'Failed processing file %s\n%s' % (full_fname, str(e)),
                    'r')
                exit(-1)
            fclrprint(
                'Map addition took %s' %
                (str(timedelta(seconds=int(time() - it_start_time))).zfill(8)),
                'c')
            fclrprint(
                'Total running time %s' %
                (str(timedelta(seconds=int(time() - start_time))).zfill(8)),
                'c')

    print(sgraph)
    fclrprint('Segmentation finished!', 'g')
    sgraph.export_geom_jl_file(outputfile.replace('.jl', '.geom.jl'))
    sgraph.export_segments_jl_file(outputfile.replace('.jl', '.seg.jl'))
    sgraph.export_relations_jl_file(outputfile.replace('.jl', '.rel.jl'))
Exemple #13
0
def process_file(fname):
    ''' Processes a file looking for units, returns a structured output (dict) of the file. '''

    global g_tot_num_of_sheets

    # init file dictionaries
    f_dict = dict()
    raw_f_dict = dict()
    # Load spreadsheets
    xl = ExcelFile(fname)
    # iterate over sheets
    for sheet_name in xl.sheet_names:
        g_tot_num_of_sheets += 1
        # Load a sheet into a DataFrame by name
        df = xl.parse(sheet_name, header=None, skip_blank_lines=False)
        raw_f_dict[sheet_name] = df
        fclrprint(f'Processing Sheet {sheet_name}...')
        sht_dict = process_sheet(df)
        if sht_dict:
            f_dict[sheet_name] = sht_dict
    if f_dict:
        return f_dict, raw_f_dict
    return None, raw_f_dict
Exemple #14
0
def main():
    ap = ArgumentParser(description=f'Process a spreasheet file (xlsx) and generate a dictionary file (json) of cell locations in which units were detected.\n\tUSAGE: python {basename(__file__)} -i INPUT_FILE')
    ap.add_argument('-i', '--input_file', help='input spreasheet file (xlsx).', type=str)
    args = ap.parse_args()

    if args.input_file:
        init_globals()
        output_fname = '.'.join(args.input_file.split('.')[:-1]) + '.ccut.json'
        fclrprint(f'Processing file {args.input_file}')
        dict_out, _ = process_file(args.input_file)
        with open(output_fname, 'w') as outfile:
            dump(dict_out, outfile, indent=2)
        fclrprint(f'Done... generated file {output_fname}', 'g')
    else:
        fclrprint(f'An input file was not provided.', 'r')
        exit(1)
def ccut_test_xlsx_files_in_dir(input_dir_name, output_debug_file):
    ''' Process the xlsx files in a given directory and match against its given validation file. '''

    global g_err_dbg, g_err_dct_p_file, g_ignore_articles, g_list_of_ignored_articles

    init_globals()
    init_ccut_validation(output_debug_file)
    true_pos, false_pos, false_neg = 0, 0, 0

    tot_files = get_num_of_files_in_dir(input_dir_name, ".xlsx")
    files_processed = 0
    actual_files_processed = 0
    start = time()
    for xfname in listdir(input_dir_name):
        if xfname.endswith(".xlsx"):
            files_processed += 1
            xfname_no_suffix = xfname.split('.xlsx')[0]
            if g_ignore_articles:
                # check if article is in provided list
                if xfname_no_suffix in g_list_of_ignored_articles:
                    continue
            actual_files_processed += 1
            xfname_full = join(input_dir_name, xfname)
            vfname_full = join(input_dir_name,
                               xfname_no_suffix + '.ccutvld.json')
            if not exists(vfname_full):
                fclrprint(
                    f'File {xfname_full} does not have a results file. Skipping...',
                    'r')
                continue
            fclrprint(
                f'Processing file {xfname_full} and comparing results to {vfname_full}'
            )
            act_dict, _ = process_file(xfname_full)
            #import IPython; IPython.embed();
            with open(vfname_full, 'r') as read_file:
                val_dict = load(read_file)
            f_tp, f_fp, f_fn = compare_actual_with_expected_dicts(
                act_dict, val_dict)
            # update debug dictionary
            if g_err_dbg:
                g_err_dct_p_file[xfname] = dict()
                g_err_dct_p_file[xfname]['tp'] = f_tp
                g_err_dct_p_file[xfname]['fp'] = f_fp
                g_err_dct_p_file[xfname]['fn'] = f_fn
            # add to total
            true_pos += f_tp
            false_pos += f_fp
            false_neg += f_fn
            if (files_processed % 10) == 0:
                eta = (tot_files - files_processed) * (time() -
                                                       start) / files_processed
                eta = str(timedelta(seconds=int(eta))).zfill(8)
                print('Completion: %04.2f%%, eta: %s' %
                      (100 * files_processed / tot_files, eta))
                calc_and_print_stats(true_pos, false_pos, false_neg)

    calc_and_print_stats(true_pos, false_pos, false_neg, color='g')
    print(
        f'Processed a total of {actual_files_processed} files ({get_tot_num_of_sheets()} sheets) out of {files_processed} .xlsx files in given directory {input_dir_name}!'
    )
    print_debug_dict(output_debug_file)
Exemple #16
0
    def pgcprint(self, pstr):
        ''' Debug printing method. '''

        if self.verbosity:
            fclrprint(pstr, 'b')
Exemple #17
0
                    res = results["results"]["bindings"][i][k]['value']
                    re.append(res)
                    if k == 'wkt':
                        linestring_data.append(res)
                ret.append(re)
        except:
            keys = ['No results']
        return render_template('index.html',
                               classdropdown=SPARQL_EXAMPLES.keys(),
                               selectedclass='',
                               raw_sparql=_sparql,
                               data=linestring_data,
                               key=keys,
                               result=ret)


if __name__ == '__main__':
    global g_sparql

    parser = ArgumentParser()
    parser.add_argument('-s')
    args = parser.parse_args()
    app.config['sparql_endpoint'] = "http://localhost:3030/linkedmaps/query"
    if args.s:
        app.config['sparql_endpoint'] = args.s
    else:
        fclrprint(f'---You did not set a SPARQL endpoint, using default', 'r')
    fclrprint(f'---Your SPARQL endpoint: {app.config["sparql_endpoint"]}', 'g')
    g_sparql = SPARQLWrapper(app.config['sparql_endpoint'])
    g_sparql.setReturnFormat(JSON)
    app.run(host="localhost", port=5000, debug=True)
def main():

    ap = ArgumentParser(
        description=
        'Process line segmetation output files (jl) and generate (ttl) file containing triples.\n\tUSAGE: python %s -g GEOMETRY_FILE -s SEGMENTS_FILE -r RELATIONS_FILE -l OSM_URIS_FILE'
        % (basename(__file__)))
    ap.add_argument('-g',
                    '--geometry_file',
                    help='File (jl) holding the geometry info (wkt).',
                    type=str)
    ap.add_argument('-s',
                    '--segments_file',
                    help='File (jl) holding segments info (metadata).',
                    type=str)
    ap.add_argument(
        '-r',
        '--relations_file',
        help='File (jl) holding relations info (parents, children).',
        type=str)
    ap.add_argument('-l',
                    '--osm_uris_file',
                    help='File (jl) holding OpenStreetMap info.',
                    type=str)
    ap.add_argument('-o',
                    '--output_file',
                    help='The output file (ttl) with the generated triples.',
                    default='linked_maps.maps.ttl',
                    type=str)

    args = ap.parse_args()

    if args.geometry_file and args.relations_file and args.segments_file:
        fclrprint(
            'Going to process files %s, %s, %s...' %
            (args.geometry_file, args.segments_file, args.relations_file))

        # initialize graph with gid-to-wkt mapping file
        lm_graph = LinkedMapGraph(args.geometry_file)

        # load segments info
        with open(args.segments_file) as read_file:
            for line_r in read_file:
                seg_dict = loads(line_r)
                lm_graph.add_geo_feature_node(seg_dict['gid'],
                                              seg_dict['name'],
                                              seg_dict['years'])

        # load relations info
        with open(args.relations_file) as read_file:
            for line_r in read_file:
                rel_dict = loads(line_r)
                lm_graph.add_geo_child_to_parent(rel_dict['parent_gid'],
                                                 rel_dict['child_gid'])

        # load OpenStreetMap info
        if args.osm_uris_file:
            with open(args.osm_uris_file) as read_file:
                for line_r in read_file:
                    osm_dict = loads(line_r)
                    lm_graph.add_openstreetmap_uris_to_gid(
                        osm_dict['gid'], osm_dict['osm_uris'])

        # materialize triples
        lm_graph.dt.serialize(args.output_file, format="turtle")
        fclrprint('Done, generated ttl file %s!' % (args.output_file), 'g')
    else:
        fclrprint('Geometry, segments and relations files were not provided.',
                  'r')
        exit(1)
Exemple #19
0
    def add_segment_to_graph(self, segment):
        ''' Add segment to the graph. '''

        leaves = self.get_leaf_nodes()

        # append new segment to graph
        self.sg.append(segment)

        list_of_leaf_gids = list()
        for leaf_seg in leaves:
            # intersect
            int_seg = leaf_seg.intersect(
                segment,
                str('i_' + hash_string_md5('i_%s_%s' %
                                           (leaf_seg.name, segment.name))))
            if int_seg:
                fclrprint(
                    '[%d] = [%d] AND [%d]' %
                    (int_seg.gid, leaf_seg.gid, segment.gid), 'p')
                self.sg.append(int_seg)
                list_of_leaf_gids.append(int_seg.gid)
                # leaf minus intersection (if intersection is not empty)
                leaf_min_int = leaf_seg.minus(
                    int_seg,
                    str('m_' + hash_string_md5('m_%s_%s' %
                                               (leaf_seg.name, int_seg.name))))
                if leaf_min_int:
                    fclrprint(
                        '[%d] = [%d] \\ [%d]' %
                        (leaf_min_int.gid, leaf_seg.gid, int_seg.gid), 'p')
                    self.sg.append(leaf_min_int)
                else:
                    fclrprint(
                        '{} = [%d] \\ [%d]' % (leaf_seg.gid, int_seg.gid),
                        'gray')
            else:
                fclrprint('{} = [%d] AND [%d]' % (leaf_seg.gid, segment.gid),
                          'gray')

        if list_of_leaf_gids:
            # segment minus union-of-intersections
            segment_min_union_ints = segment.minus_union_of_segments(
                list_of_leaf_gids,
                str('mu_' + hash_string_md5('mu_%s_UL' % (segment.name))))
            if segment_min_union_ints:
                fclrprint(
                    '[%d] = [%d] \\ UNION%s' %
                    (segment_min_union_ints.gid, segment.gid,
                     str(list_of_leaf_gids)), 'p')
                self.sg.append(segment_min_union_ints)
            else:
                fclrprint(
                    '{} = [%d] \\ UNION%s' %
                    (segment.gid, str(list_of_leaf_gids)), 'gray')

        # commit changes
        self.pgchannel.connection.commit()