def get_categories(file_path):
    records = ETLUtils.load_json_file(file_path)

    # Now we obtain the categories for all the businesses
    records = ETLUtils.add_transpose_list_column('categories', records)
    BusinessETL.drop_unwanted_fields(records)

    return records[0].keys()
Example #2
0
def get_categories(file_path):
    records = ETLUtils.load_json_file(file_path)

    # Now we obtain the categories for all the businesses
    records = ETLUtils.add_transpose_list_column('categories', records)
    BusinessETL.drop_unwanted_fields(records)

    return records[0].keys()
Example #3
0
    def create_category_matrix(file_path):
        """
        Creates a matrix with all the categories for businesses that are
        contained in the Yelp Phoenix Business data set. Each column of the
        matrix represents a category, and each row a business. This is a binary
        matrix that contains a 1 at the position i,j if the business i contains
        the category j, and a 0 otherwise.

        :rtype : numpy array matrix
        :param file_path: the path for the file that contains the businesses
        data
        :return: a numpy array binary matrix
        """
        records = ETLUtils.load_json_file(file_path)

        # Now we obtain the categories for all the businesses
        records = ETLUtils.add_transpose_list_column('categories', records)
        BusinessETL.drop_unwanted_fields(records)
        matrix = numpy.array(
            [numpy.array(record.values()) for record in records])

        return matrix
Example #4
0
    def create_category_matrix(file_path):
        """
        Creates a matrix with all the categories for businesses that are
        contained in the Yelp Phoenix Business data set. Each column of the
        matrix represents a category, and each row a business. This is a binary
        matrix that contains a 1 at the position i,j if the business i contains
        the category j, and a 0 otherwise.

        :rtype : numpy array matrix
        :param file_path: the path for the file that contains the businesses
        data
        :return: a numpy array binary matrix
        """
        records = ETLUtils.load_json_file(file_path)

        # Now we obtain the categories for all the businesses
        records = ETLUtils.add_transpose_list_column('categories', records)
        BusinessETL.drop_unwanted_fields(records)
        matrix = numpy.array(
            [numpy.array(record.values()) for record in records])

        return matrix