Example #1
0
    def __init__(self, pois, level=None):
        """

        :param pois: dataframe with POIs and respective coordinates.
        :param level: level of the category we want (e.g. Shops & Services:Gas Stations -> level 0: Shops & Services)
                      This param has to be used when it is necessary produce a clean file text for word2vec.
        """

        self._pois = pois
        self._level = level

        if self._level is not None:
            self._pois['categories'] = utils.select_category(
                list(self._pois['categories']), level)
Example #2
0
    def from_csv(cls, input, sep='\t', category_column='categories', level=5):

        # load foursquare dataset mapped on a particular grid
        df = pd.read_csv(input, sep=sep)
        df[category_column] = df[category_column].astype(str)

        # assign category to each record of the dataset

        df.loc[:, "category"] = utils.select_category(list(df[category_column]), level)

        # drop entry with empty category
        df = df.loc[df["category"] != "nan"]

        return cls(df)
Example #3
0
def cell_vector_representation(poi_grid, w2v_model, level, output_file, size):
    """
    Takes as input a spatial grid with POIs for each cell, a Word2Vec model, and a level of detail
    For each cell:
        Looks up each category in a cell for the given level in the W2V model, taking the corresponding vector representation
        Sums all the vectors
    Returns a dataframe with a w2v representation for all words in that cell in every row
    """
    # load shapefile of mapped POIs
    gdf = csv_2_geodf(poi_grid)

    # load w2v_model
    model = gensim.models.Word2Vec.load(w2v_model)

    # group every cell
    grouped_gdf = gdf.groupby('cellID')

    output = {}
    with open(output_file, 'w') as out:
        for cell, group in grouped_gdf:
            output[cell] = []
            for categories_raw in group['categories']:
                # select level
                category = utils.select_category(categories_raw.split(':'),
                                                 level)[-1]
                # lookup category in w2v
                try:
                    vector = model[category]
                    output[cell].append(np.array(vector))
                except (KeyError):
                    pass
            if len(output[cell]) == 0:
                output[cell] = [np.zeros(int(size))]

            # sum vectors
            sum_w = sum_vectors(output[cell])
            sum_w_str = str("\t".join(map(str, sum_w)))
            text_to_write = str(cell) + '\t' + sum_w_str + '\n'

            out.write(text_to_write)
Example #4
0
    def generate(self, model, area_based=True, strategy='avg'):

        """
        TODO: create the NOT area based function
        """

        # load w2v_model
        model = gensim.models.Word2Vec.load(model)


        # group every cell
        grouped_gdf = gdf.groupby('cellID')

        output = {}
        with open(output_file, 'w') as out:
            for cell, group in grouped_gdf:
                output[cell] = []
                for categories_raw in group['categories']:
                    # select level
                    category = utils.select_category(
                        categories_raw.split(':'), level)[-1]
                    # lookup category in w2v
                    try:
                        vector = model[category]
                        output[cell].append(np.array(vector))
                    except(KeyError):
                        pass
                if len(output[cell]) == 0:
                    output[cell] = [np.zeros(int(size))]

                # sum vectors
                sum_w = sum_vectors(output[cell])
                sum_w_str = str("\t".join(map(str, sum_w)))
                text_to_write = str(cell) + '\t' + sum_w_str + '\n'

                out.write(text_to_write)