Example #1
0
def get_pokestops(request):
    # return HttpResponse("")
    args = {"csrftoken": "KrC5dILDzSfyGYTdOuk0P7ZNanrVRNsv",
            "SACSID": """~AJKiYcG_GzhmrIH-wHeFC1QE2U540HyYy8n7WK1uwi_6jTgLzPS5UFWrfvxe1XiSfbSEoN1L6jY8BWxX7zdPM_1FVyv4jwbwqfd1FS-HcL8IhD769flzqNpWX5EmbdnWFje81MNPOW2NYRCygRd8PHBXjKMVT_O0cM5ZXy8Jg5Tm75WYJ8tKEBeIRq8UeeriC-avEbgUlRnIqw868OUY2K6NTu6V15Lz-5GSlGwGWxkPzwaXfQr6iA1oLU-tjitfp5RsRFNZM3Ai84dH-tpgxcMAZEFM5co1N69T-Mr2-jxCgzBfD9x_lH4PUty-oyjAR90Z7sS8CzSo"""}

    coordinates = get_coords()
    total = len(coordinates)
    count = 0
    for c in coordinates:
        count += 1
        args['latitude'] = c[0]
        args['longitude'] = c[1]

        pokestop = Pokestop(args)
        pokestops = pokestop.entities()
        for p in pokestops:
            q = Poke()
            q.guid = p.get('guid')
            q.latitude = p.get('latitude')
            q.longitude = p.get('longitude')
            q.image = p.get('image')
            q.name = p.get('name')
            q.save()
        print_progress(count, total)
        time.sleep(1)

    return HttpResponse('success')
Example #2
0
def save_pokestops():
    settings.configure()
    args = {
        "csrftoken":
        "3DX2hXU05R1u9SJNJetgemM3GYHE9V3I",
        "SACSID":
        """~AJKiYcG3G0qMy0FrvJ1ZetpfHv3FhKc_gb1bw909JXFmrzhIZR6xMW1dlxB4xyG0_rX_ZKCa2Zsrv9N0Mx_c5_uRp0TzRodIH8IxwjO49nJr07XBPypIrwUCmphMvdJhzVPdVePsGIEWRETQTjP2qingSLN0l3acas4Fw7HFDFFJSBGVOymIfEOQmbwvKbB0tLMn4ZHIPGPqNBeRHaoMstBuiyRjUsmZp5YSWcdo6sOUiCWehR2Ab0nvFKE6S_uxjvYCAV_avud7_F-kKUbNEiudZAaPwtKFUEMFDhdev5Gmyw7NnR7ektCZfmigZ3TVsc5szPVFDD3CuGwI_r2y_PZ8jB67DuOfww"""
    }
    # db_test_server = MySQLdb.connect(user="******", passwd="India123", db='hifitestdb',
    #                                  host='hifidbinstance.cvc882wdgyyf.us-east-1.rds.amazonaws.com')
    db_test_server = MySQLdb.connect(user="******", passwd="'", db="hifi")
    cur = db_test_server.cursor()

    coordinates = get_coords()
    total = len(coordinates)
    count = 0
    for c in coordinates:
        count += 1
        args['latitude'] = c[0]
        args['longitude'] = c[1]
        pokestop = Pokestop(args)
        pokestops = pokestop.entities()
        for p in pokestops:
            cur.execute(
                """INSERT INTO `pokemongo_pokestop`(`distance`, `name`, `bearing`, `latitude`, `longitude`,
                `image`, `guid`,`compass`) VALUES('{distance}','{name}','{bearing}','{latitude}','{longitude}','{image}'
                ,'{guid}', '{compass}')""",
                format({
                    'distance': str(p.get('distance')),
                    'name': str(p.get('name')),
                    'bearing': str(p.get('bearing')),
                    'latitude': str(p.get('latitude')),
                    'longitude': str(p.get('longitude')),
                    'image': str(p.get('image')),
                    'guid': str(p.get('guid')),
                    'compass': str(p.get('compass'))
                }))

        print_progress(count, total)
        time.sleep(2)
    db_test_server.commit()

    return True
    def dataset_to_tfrecords(self, data, output_path):

        # Number of images. Used when printing the progress.
        num_images = len(data['image'])

        # TODO: aSk: Handle case when images do not fit batch
        batch_size = self.config.tfr_images_per_record
        iters = int(num_images / batch_size)

        for iter_no in range(iters):
            idx_start = iter_no * batch_size
            idx_end = idx_start + batch_size

            output_path_mod = os.path.join(output_path,
                                           'record_' + str(iter_no) + '.tfr')
            self.create_tfrecord(data, idx_start, idx_end, output_path_mod,
                                 iter_no)

            # Print the percentage-progress.
            utils.print_progress(count=idx_start, total=num_images)
Example #4
0
def add_missing_edge_geometries(graph, edge_dicts):
    edge_count = len(edge_dicts)
    for idx, edge_d in enumerate(edge_dicts):
        if ('geometry' not in edge_d):
            node_from = edge_d['uvkey'][0]
            node_to = edge_d['uvkey'][1]
            # interpolate missing geometry as straigth line between nodes
            edge_geom = get_edge_geom_from_node_pair(graph, node_from, node_to)
            # set geometry attribute of the edge
            nx.set_edge_attributes(graph,
                                   {edge_d['uvkey']: {
                                        'geometry': edge_geom
                                    }})
        # set length attribute
        nx.set_edge_attributes(
            graph,
            {edge_d['uvkey']: {
                 'length': round(edge_d['geometry'].length, 3)
             }})
        utils.print_progress(idx + 1, edge_count, percentages=True)
    print('\nEdge geometries & lengths set.')
Example #5
0
    def dataset_to_tfrecords(self, image_paths, labels, output_path):

        num_images = len(image_paths)
        batch_size = self.config.tfr_images_per_record
        iters = int(num_images / batch_size)
        print('\nnum_images: {}'.format(num_images))
        print('batch_size: {}'.format(batch_size))

        idx_start = 0
        idx_end = 0
        for iter_no in range(iters):
            idx_start = iter_no * batch_size
            idx_end = idx_start + batch_size
            print('\nidx:[{}-{}]'.format(idx_start, idx_end))

            output_path_mod = os.path.join(output_path,
                                           'record_' + str(iter_no) + '.tfr')
            self.create_tfrecord(image_paths, labels, idx_start, idx_end,
                                 output_path_mod, iter_no)

            # Print the percentage-progress.
            utils.print_progress(count=idx_start, total=num_images)

        # For images < batch_size and
        # For images which do not fit the last batch
        idx_start = iters * batch_size
        idx_end = idx_start + (num_images % batch_size)
        print('\nidx:[{}-{}]'.format(idx_start, idx_end))
        if (num_images % batch_size):
            output_path_mod = os.path.join(output_path,
                                           'record_' + str(iters) + '.tfr')
            self.create_tfrecord(image_paths, labels, idx_start, idx_end,
                                 output_path_mod)

            # Print the percentage-progress.
            # utils.print_progress(count=idx_start, total=num_images)

        print('\n')
    def populate():
        i = 0
        total_items = len(products['products'])
        print_progress(i, total_items)
        for product in products['products']:
            osigu_product = product['osigu']
            osigu_product['full_name'] += ' - QA TESTING'
            elasticSearchClient.post(index='product',
                                     doc_id=osigu_product['id'],
                                     body=osigu_product)
            provider_products = providerHandler.create_product(osigu_product)
            insurer_products = insurerHandler.create_product(osigu_product)

            if 'associate_provider' not in product:
                osiguHandler.associate_provider_product(
                    osigu_product['id'], provider_products)

            if 'associate_insurer' not in product:
                osiguHandler.associate_insurer_product(osigu_product['id'],
                                                       insurer_products)

            if 'diagnoses' in product:
                osiguHandler.create_product_diagnosis_association(
                    osigu_product['id'], product['diagnoses'])
                if 'insurer_product_diagnoses' not in product:
                    insurerHandler.create_insurer_product_diagnoses(
                        insurer_products, product['diagnoses'])

            if 'pbm' in product:
                pbmHandler.build_product_categories_statement(product)
                if 'substitute_osigu_product_id' in product['pbm']:
                    pbmHandler.build_substitute_products_statement(product)

            i += 1
            print_progress(i, total_items)

        pbmHandler.populate()
Example #7
0
def test_distr_centers_with_DT(districts_gdf):
    datetime = times.get_next_weekday_datetime(8, 30, skipdays=7)
    test_latLon = {'lat': 60.23122, 'lon': 24.83998}

    distr_valids = {}
    districts_gdf = districts_gdf.copy()
    for idx, distr in districts_gdf.iterrows():
        utils.print_progress(idx, len(districts_gdf), percentages=False)
        try:
            itins = DT_routing.get_route_itineraries(test_latLon,
                                                     distr['distr_latLon'],
                                                     '1.6666',
                                                     datetime,
                                                     itins_count=3,
                                                     max_walk_distance=6000)
        except Exception:
            itins = []
        valid = 'yes' if (len(itins) > 0) else 'no'
        distr_valids[distr['id_distr']] = valid

    districts_gdf['DT_valid'] = [
        distr_valids[id_distr] for id_distr in districts_gdf['id_distr']
    ]
    return districts_gdf
Example #8
0
print('Unprocessed:', len(axyinds))
#%% get axyinds to reprocess
axyinds = commutes_utils.get_axyinds_to_reprocess(grid, reprocessed)
axyinds = [axyind for axyind in axyinds if axyind not in reprocessed]
print('Processed:', len(reprocessed), 'axyinds')
print('Unprocessed:', len(axyinds), 'axyinds')
#%% take subset of axyinds to process
#%%
axyinds = axyinds[:10]
print('Start processing:', len(axyinds), 'axyinds')

#%% one by one
start_time = time.time()
all_home_walks_dfs = []
for idx, axyind in enumerate(axyinds):
    utils.print_progress(idx, len(axyinds), False)
    print('\nStart processing:', axyind)
    all_home_walks_dfs.append(get_home_walk_gdf(axyind))
    reprocessed.append(axyind)
# print time stats
time_elapsed = round(time.time() - start_time)
avg_origin_time = round(time_elapsed / len(axyinds))
print('--- %s min --- %s' % (round(
    time_elapsed / 60, 1), 'processed: ' + str(len(axyinds)) + ' origins'))
print('Average origin processing time:', avg_origin_time, 's')

#%% export to GDF for debugging
# all_home_walks_df = pd.concat(all_home_walks_dfs, ignore_index=True)
# all_home_walks_gdf = gpd.GeoDataFrame(all_home_walks_df, geometry='DT_geom', crs=from_epsg(4326))
# all_home_walks_gdf.drop(columns=['DT_dest_Point']).to_file('outputs/YKR_commutes_output/test.gpkg', layer='dt_paths', driver='GPKG')
# all_home_walks_gdf = gpd.GeoDataFrame(all_home_walks_df, geometry='DT_dest_Point', crs=from_epsg(4326))
                        driver="GPKG")
print('exported', filt_edges_filename, 'to data/networks.gpkg')
# get edge gdf from graph
edge_gdf = nw.get_edge_gdf(graph,
                           by_nodes=True,
                           attrs=['geometry', 'length', 'osmid'])
# add osmid as string to edge gdfs
edge_gdf['osmid_str'] = [
    nw.osmid_to_string(osmid) for osmid in edge_gdf['osmid']
]

#%% 3.3 Find matching (unwalkable) edges from the graph
edges_to_rm = []
edges_to_rm_gdfs = []
for idx, filt_edge in filt_edge_gdf.iterrows():
    utils.print_progress(idx, len(filt_edge_gdf), percentages=True)
    edges_found = edge_gdf.loc[edge_gdf['osmid_str'].str.contains(
        filt_edge['osmid_str'])].copy()
    if (len(edges_found) > 0):
        edges_found['filter_match'] = [
            geom_utils.lines_overlap(filt_edge['geometry'],
                                     geom,
                                     min_intersect=0.5)
            for geom in edges_found['geometry']
        ]
        edges_match = edges_found.loc[edges_found['filter_match'] ==
                                      True].copy()
        edges_to_rm_gdfs.append(edges_match)
        rm_edges = list(edges_match['uvkey'])
        edges_to_rm += rm_edges
all_edges_to_rm_gdf = gpd.GeoDataFrame(pd.concat(edges_to_rm_gdfs,
Example #10
0
    def preprocess(self, properties, datasets, logger, kind=PreprocessKind.train.value):
        """
            Checks if the input and the rating file exist and loads them from the output folder. Otherwise, takes the
            rating, movies and tags datasets and converts them to dataframes and also loads the glove file. It iterates
            the ratings dataframe keeping from every row the movie id, user id and the rating. It uses the functions
            preprocess_rating, preprocess_text and text_to_glove to create a vector corresponding to a movie's features
            and user id. The user's id is added on the first position of that vector. Every vector is added to a list of
            vectors called input_data. Finally, the rating of every user for a particular movie is added to a list
            called ratings and both this list as well as the input_data list are being saved to the output folder.

            Args:
                properties(dict): properties loaded from yaml file. Used so as to get the output folder
                datasets (dict): contains the dataframes of all the movielens csvs
                logger (Logger): the logger to print messages
                kind (str): if set to train the ratings.csv is used for input vectors otherwise the generated
                test_recommendation.csv is used
        """
        output_folder = properties["output_folder"]
        input_data_pickle_filename = self.input_data_pickle + "_{}_{}".format(properties["dataset"],
                                                                              properties["classification"])
        ratings_pickle_filename = self.ratings_pickle + "_{}_{}".format(properties["dataset"],
                                                                        properties["classification"])
        test_dataset_pickle_filename = self.test_dataset_pickle + "_{}_{}".format(properties["dataset"],
                                                                                  properties["classification"])

        if self.check_pickle_files_exist(properties=properties):
            logger.info("Content-based input data already exist and will be loaded from pickle file")
            input_filename = input_data_pickle_filename if kind == PreprocessKind.train.value else \
                test_dataset_pickle_filename
            self.input_data = utils.load_from_pickle(output_folder, input_filename)
            self.ratings = utils.load_from_pickle(output_folder, ratings_pickle_filename)
            logger.info("Loaded inputs of shape {}".format(self.input_data.shape))
            logger.info("Loaded ratings of shape {}".format(self.ratings.shape))
        else:
            ratings_df = datasets["ratings"] if kind == PreprocessKind.train.value else datasets["test_recommendation"]
            movies_df = datasets["movies"]
            tags_df = datasets["tags"]
            glove_df = utils.load_glove_file(properties=properties, logger=logger)
            users_dict_dummy = self.__create_dummy_variables(ratings=ratings_df)
            logger.info("Generating input vectors")
            self.input_data = []
            self.ratings = []
            for index, row in ratings_df.iterrows():
                user_id, movie_id, rating, _ = row
                movie_id = int(movie_id)
                user_id = int(user_id)
                logger.debug("Preprocessing userid {} and movieid {} with rating {}".format(user_id, movie_id, rating))
                # preprocess
                rating = self._preprocess_rating(properties, rating)
                logger.debug("Preprocessed rating: {}".format(rating))
                movie_text = self._preprocess_text(movies_df, tags_df, movie_id, user_id, logger)
                logger.debug("Preprocessed text: {}".format(" ".join(movie_text)))
                movie_vector = self._text_to_glove(properties, glove_df, movie_text)
                if movie_vector.size == 0:
                    continue
                movie_vector = np.concatenate((users_dict_dummy[user_id], movie_vector), axis=1)
                self.input_data.append(movie_vector)
                self.ratings.append(rating)
                utils.print_progress(self.ratings, logger=logger)

            self.ratings = np.asarray(self.ratings)
            self.input_data = np.concatenate(self.input_data)
            logger.info("Produced a feature matrix of shape {}".format(self.input_data.shape))
            # standardization
            logger.info("Standardize input vectors")
            self.input_data = preprocessing.scale(self.input_data)
            logger.info("Save input vectors to file")
            input_filename = input_data_pickle_filename if kind == PreprocessKind.train.value else \
                test_dataset_pickle_filename
            utils.write_to_pickle(obj=self.input_data, directory=output_folder, filename=input_filename)
            utils.write_to_pickle(obj=self.ratings, directory=output_folder, filename=ratings_pickle_filename)
    def preprocess(self,
                   properties,
                   datasets,
                   logger,
                   kind=PreprocessKind.train.value):
        """
        Initially, checks if the ratings list exists in the output folder and if this is the case it loads it.
        Otherwise, it takes from the ratings dataset the ratings of the users, the name of the movies from the movies
        dataset and creates a list with the movies ids. Then, within a for loop iterates the ratings dataframe and for
        each user keeps track of the ratings he gave to every movie. If he didn't rate a movie, the algorithm put a
        zero to the corresponding position of the vector. After finishing this process for every user, it returns the
        vectors of the users as a list called user_ratings and writes it to the output folder as a pickle file.

        Args
            properties (dict): dictionary with the loaded properties from the yaml file
            datasets (dict): the datasets' dictionary which was created from the read_csv function

        """
        output_folder = properties["output_folder"]
        users_ratings_pickle_filename = self.users_ratings_pickle + "_{}".format(
            properties["dataset"])
        users_ids_pickle_filename = self.users_ids_pickle + "_{}".format(
            properties["dataset"])
        movie_ids_pickle_filename = self.movie_ids_pickle + "_{}".format(
            properties["dataset"])
        test_dataset_pickle_filename = self.test_dataset_pickle + "_{}".format(
            properties["dataset"])

        if utils.check_file_exists(output_folder,
                                   users_ratings_pickle_filename):
            logger.info(
                "Collaborative input vectors already exist and will be loaded from pickle file"
            )
            input_filename = users_ratings_pickle_filename if kind == PreprocessKind.train.value else \
                test_dataset_pickle_filename
            self.users_ratings = utils.load_from_pickle(
                output_folder, input_filename)
            self.user_ids = utils.load_from_pickle(output_folder,
                                                   users_ids_pickle_filename)
            self.movie_ids = utils.load_from_pickle(output_folder,
                                                    movie_ids_pickle_filename)
            logger.info("Loaded user ratings of shape {}".format(
                self.users_ratings.shape))
        else:
            os.makedirs(output_folder, exist_ok=True)
            ratings_df = datasets[
                "ratings"] if kind == PreprocessKind.train.value else datasets[
                    "test_recommendation"]
            movies_df = datasets["movies"]
            self.users_ratings = []
            self.user_ids = []
            self.movie_ids = movies_df["movieId"].values.tolist()
            logger.info("Generating input vectors")
            for _, row in ratings_df.iterrows():
                user_id = row["userId"]
                if user_id not in self.user_ids:
                    self.user_ids.append(user_id)
                    user_ratings = ratings_df[ratings_df["userId"] == user_id]
                    user_vector = []
                    for movie_id in self.movie_ids:
                        rating_row = user_ratings[user_ratings["movieId"] ==
                                                  movie_id]
                        if not rating_row.empty:
                            rating_row = rating_row["rating"].values.tolist()
                            user_vector.append(rating_row[0])
                        else:
                            user_vector.append(0.0)
                    user_vector = np.array(user_vector)
                    self.users_ratings.append(user_vector)
                utils.print_progress(self.users_ratings, logger=logger)
            logger.info("Writing input vectors into pickle file")
            self.users_ratings = np.array(self.users_ratings)
            self.user_ids = np.asarray(self.user_ids)
            self.movie_ids = np.asarray(self.movie_ids)
            input_filename = users_ratings_pickle_filename if kind == PreprocessKind.train.value else \
                test_dataset_pickle_filename
            utils.write_to_pickle(self.users_ratings, output_folder,
                                  input_filename)
            utils.write_to_pickle(self.user_ids, output_folder,
                                  users_ids_pickle_filename)
            utils.write_to_pickle(self.movie_ids, output_folder,
                                  movie_ids_pickle_filename)
Example #12
0
def get_home_work_walks(axyind=None,
                        work_rows=None,
                        districts=None,
                        datetime=None,
                        walk_speed=None,
                        subset=True,
                        logging=True,
                        graph=None,
                        edge_gdf=None,
                        node_gdf=None):
    stats_path = 'outputs/YKR_commutes_output/home_workplaces_stats/'
    geom_home = work_rows['geom_home'].iloc[0]
    home_latLon = work_rows['home_latLon'].iloc[0]
    # adjust origin if necessary to work with DT routing requests
    valid_home_latLon = get_valid_latLon_for_DT(home_latLon,
                                                distance=45,
                                                datetime=datetime,
                                                graph=graph,
                                                edge_gdf=edge_gdf,
                                                node_gdf=node_gdf)
    if (valid_home_latLon == None):
        return None
    destinations = get_work_destinations_gdf(geom_home,
                                             districts,
                                             axyind=axyind,
                                             work_rows=work_rows,
                                             logging=logging)
    if (destinations == None):
        return None
    work_destinations = destinations['destinations']
    home_work_stats = destinations['home_work_stats']
    # filter rows of work_destinations for testing
    work_destinations = work_destinations[:14] if subset == True else work_destinations
    # print('work_destinations', work_destinations)
    # filter out destination if it's the same as origin
    work_destinations = work_destinations[work_destinations.apply(
        lambda x: str(x['id_destination']) != str(axyind), axis=1)]
    total_origin_workers_flow = work_destinations['yht'].sum()
    if (logging == True):
        print('Routing to', len(work_destinations.index), 'destinations:')
    # get routes to all workplaces of the route
    home_walks_all = []
    for idx, destination in work_destinations.iterrows():
        utils.print_progress(idx,
                             destinations['total_dests_count'],
                             percentages=False)
        # execute routing request to Digitransit API
        try:
            itins = DT_routing.get_route_itineraries(valid_home_latLon,
                                                     destination['to_latLon'],
                                                     walk_speed,
                                                     datetime,
                                                     itins_count=3,
                                                     max_walk_distance=2500)
        except Exception:
            print('Error in DT routing request between:', axyind, 'and',
                  destination['id_destination'])
            itins = []
        # if no itineraries got, try adjusting the origin & destination by snapping them to network
        if (len(itins) == 0):
            print('no itineraries got -> try adjusting destination')
            adj_destination = get_valid_latLon_for_DT(destination['to_latLon'],
                                                      datetime=datetime,
                                                      graph=graph,
                                                      edge_gdf=edge_gdf,
                                                      node_gdf=node_gdf)
            time.sleep(0.3)
            try:
                itins = DT_routing.get_route_itineraries(
                    valid_home_latLon,
                    adj_destination,
                    walk_speed,
                    datetime,
                    itins_count=3,
                    max_walk_distance=2500)
                print('found', len(itins),
                      'with adjusted origin & destination locations')
            except Exception:
                print('error in DT routing with adjusted origin & destination')
                itins = []

        od_itins_count = len(itins)
        od_workers_flow = destination['yht']
        if (od_itins_count > 0):
            # calculate utilization of the itineraries for identifying the probability of using the itinerary from the origin
            # based on number of commuters and number of alternative itineraries to the destination
            # if only one itinerary is got for origin-destination (commute flow), utilization equals the number of commutes between the OD pair
            utilization = round(od_workers_flow / od_itins_count, 6)
            od_walk_dicts = DT_routing.parse_itin_attributes(
                itins,
                axyind,
                destination['id_destination'],
                utilization=utilization)
            home_walks_all += od_walk_dicts
        else:
            print('No DT itineraries got between:', axyind, 'and',
                  destination['id_destination'])
            error_df = pd.DataFrame([{
                'axyind':
                axyind,
                'destination_type':
                destination['destination_type'],
                'destination_id':
                destination['id_destination'],
                'destination_yht':
                destination['yht']
            }])
            error_df.to_csv(
                'outputs/YKR_commutes_output/home_stops_errors/axyind_' +
                str(axyind) + '_to_' + str(destination['id_destination']) +
                '.csv')

    # print(home_walks_all)
    # collect walks to stops/destinations to GDF
    if (len(home_walks_all) == 0):
        return None
    home_walks_all_df = pd.DataFrame(home_walks_all)
    home_walks_all_df['uniq_id'] = home_walks_all_df.apply(
        lambda row: DT_utils.get_walk_uniq_id(row), axis=1)
    # group similar walks and calculate realtive utilization rates of them
    home_walks_g = DT_utils.group_home_walks(home_walks_all_df)
    # check that no commute data was lost in the analysis (flows match)
    total_utilization_sum = round(home_walks_g['utilization'].sum())
    total_probs = round(home_walks_g['prob'].sum())
    works_misings_routing = total_origin_workers_flow - total_utilization_sum
    if (works_misings_routing != 0 or total_probs != 100):
        print(
            'Error: utilization sum of walks does not match the total flow of commuters'
        )
        error_df = pd.DataFrame([{
            'axyind': axyind,
            'total_origin_workers_flow': total_origin_workers_flow,
            'total_utilization_sum': total_utilization_sum,
            'total_probs': total_probs
        }])
        error_df.to_csv(
            'outputs/YKR_commutes_output/home_stops_errors/axyind_' +
            str(axyind) + '_no_flow_match.csv')
    home_work_stats['works_misings_routing'] = works_misings_routing
    home_work_stats['works_misings_routing_rat'] = round(
        (works_misings_routing / total_origin_workers_flow) * 100, 1)
    home_work_stats['total_probs'] = total_probs
    home_work_stats.to_csv(stats_path + 'axyind_' + str(axyind) + '.csv')
    return home_walks_g
Example #13
0
    def create_tfrecord(self, image_paths, labels, idx_start, idx_end,
                        output_path):

        # Open a TFRecordWriter for the output-file.
        with tf.python_io.TFRecordWriter(output_path) as writer:

            for i in range(idx_start, idx_end):

                utils.print_progress(count=i, total=(idx_end - idx_start))

                image_path = image_paths[i]
                label = labels[i]

                # TODO: Do center cropping
                # img = cv2.imread(image_paths[i])
                # img = cv2.resize(img, (224, 224))

                # Load images
                img = Image.open(image_path)

                # TODO:
                # Center crop and resize image. size: The requested size in pixels, as a 2-tuple: (width, height)
                img = ImageOps.fit(img, (self.config.tfr_image_width,
                                         self.config.tfr_image_height),
                                   Image.LANCZOS, 0, (0.5, 0.5))
                # img = img.resize(size=(self.config.tfr_image_width, self.config.tfr_image_height))

                img = np.array(img)

                if output_path is not None:
                    img_path_name = os.path.join(os.path.dirname(output_path),
                                                 os.path.basename(image_path))
                    utils_image.save_image(img, img_path_name)

                ## Color constancy
                # img = utils_image.color_constancy(img, power=6, gamma=None)
                # if output_path is not None:
                #     img_path_name = os.path.join(os.path.dirname(output_path), os.path.basename(image_path))
                #     img_path_name = img_path_name.split('.')[0] + '_ilu.' + img_path_name.split('.')[1]

                #     # utils_image.save_image(img, img_path_name)
                #     img_save = Image.fromarray(img.astype('uint8'))
                #     img_save.save(img_path_name)

                # Convert the image to raw bytes.
                img_bytes = img.tostring()

                data = {
                    'image': self.wrap_bytes(img_bytes),
                    'label': self.wrap_int64(label)
                }

                # Wrap the data as TensorFlow Features.
                feature = tf.train.Features(feature=data)

                # Wrap again as a TensorFlow Example.
                example = tf.train.Example(features=feature)

                # Serialize the data.
                serialized = example.SerializeToString()

                # Write the serialized data to the TFRecords file.
                writer.write(serialized)
                raise Exception('task type % s not allowed' % task_type)
    output_file.close()


if __name__ == '__main__':
    args = arg_parser.parse_args()
    config_file = args.config
    if not os.path.exists(config_file):
        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                                config_file)
    config_parser.read(config_file)
    sessions = config_parser.sections()
    if 'STATUS' in sessions:
        is_train = config_parser['STATUS'].get('status', 'train') == 'train'
        if is_train:
            print_progress('Start Training')
        else:
            print_progress('Start Decoding')
    else:
        is_train = True

    if 'IO' in sessions:
        print_progress("Start config IO")
        IO_session = config_parser['IO']
        for key, val in IO_session.items():
            print(key, '=', val)
        file_type = IO_session.get('type', None)
        if file_type:
            if file_type == 'csv':
                if is_train:
                    # training
Example #15
0
    def train(self,
              model,
              train_data,
              train_size,
              num_steps,
              num_epochs,
              min_loss=0.3):
        #为了使所有op产生的随机序列在会话之间是可重复的,设置一个图级别的seed
        tf.set_random_seed(1234)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            total_loss = []
            timings = []
            steps_per_epoch = int(train_size / self.batch_size)
            num_epoch = 1

            for step in range(1, num_steps):
                beg_t = timeit.default_timer()
                #X, L = train_data.next()
                X, L = train_data, [1, 2, 3, 5]
                seq_len = np.max(L)

                print('X:', X)
                print('L:', L)
                print('seq_len:', seq_len)

                # For anomaly detection problem we reconstruct input data, so
                # targets and inputs are identical.
                feed_dict = {
                    model.inputs: X,
                    model.targets: X,
                    model.lengths: L,
                    model.dropout: self.dropout,
                    model.batch_size: self.batch_size,
                    model.max_seq_len: seq_len
                }

                fetches = [
                    model.loss, model.decoder_outputs, model.train_optimizer
                ]
                step_loss, _, _ = sess.run(fetches, feed_dict)

                total_loss.append(step_loss)
                timings.append(timeit.default_timer() - beg_t)

                print('total_loss:', total_loss)
                if step % steps_per_epoch == 0:
                    num_epoch += 1

                if step % 200 == 0 or step == 1:
                    print_progress(int(step / 200), num_epoch,
                                   np.mean(total_loss), np.mean(step_loss),
                                   np.sum(timings))
                    timings = []

                if step == 1:
                    _ = tf.train.export_meta_graph(
                        filename=self.path_to_graph + '.meta')

                if np.mean(total_loss) < min_loss or num_epoch > num_epochs:
                    model.saver.save(sess,
                                     self.path_to_graph,
                                     global_step=step)
                    print("Training is finished.")
                    break
    def train(self,
              model,
              train_data,
              train_size,
              num_steps,
              num_epochs,
              min_loss=0.3):
        """
        Trains a given model architecture with given train data.
        """
        # global_step = tf.Variable(0, name='global_step', trainable=False)
        tf.set_random_seed(1234)
        saver = tf.train.Saver()

        with tf.Session() as sess:
            train_writer = tf.summary.FileWriter('./logs/1/train', sess.graph)
            try:
                saver.restore(sess,
                              tf.train.latest_checkpoint(self.checkpoints))
                # print("Model restore finished, current globle step: %d" % global_step.eval())
            except ValueError:
                print("Restoring failed.")
                sess.run(tf.global_variables_initializer())

            total_loss = []
            timings = []
            steps_per_epoch = int(train_size / self.batch_size)
            num_epoch = 1

            # last_step = global_step.eval(sess)
            # print("Last step: ", global_step.eval(sess))

            print("Start training.")

            for step in range(1, num_steps):
                beg_t = timeit.default_timer()
                X, L = train_data.__next__()
                seq_len = np.max(L)

                merge = tf.summary.merge_all()

                # For anomaly detection problem we reconstruct input data, so
                # targets and inputs are identical.
                feed_dict = {
                    model.inputs: X,
                    model.targets: X,
                    model.lengths: L,
                    model.dropout: self.dropout,
                    model.batch_size: self.batch_size,
                    model.max_seq_len: seq_len
                }
                fetches = [
                    merge, model.loss, model.decoder_outputs,
                    model.train_optimizer
                ]

                summary, step_loss, _, _ = sess.run(fetches, feed_dict)
                train_writer.add_summary(summary, step)

                total_loss.append(step_loss)
                timings.append(timeit.default_timer() - beg_t)

                if step % steps_per_epoch == 0:
                    num_epoch += 1
                    model.saver.save(sess,
                                     self.path_to_graph,
                                     global_step=step)
                    print("Save checkpoints.")

                if step % 20 == 0 or step == 1:
                    print_progress(step, num_epoch, np.mean(total_loss),
                                   np.mean(step_loss), np.sum(timings))
                    timings = []

                if step == 1:
                    _ = tf.train.export_meta_graph(
                        filename=self.path_to_graph + '.meta')

                if np.mean(total_loss) < min_loss or num_epoch > num_epochs:
                    model.saver.save(sess,
                                     self.path_to_graph,
                                     global_step=step)
                    print("Training is finished.")
                    break