def generate_constant_csvs(level="Family Name"):
    if not file_utils.folder_exists("top_category_files/"):
        generate_top_category_files(level)
    else:
        print("top_category_files/ directory already exists")
    if not file_utils.file_exists("top_category_strings.csv"):
        generate_top_category_string_csv()
    else:
        print("top_category_strings.csv file already exists")
    if not file_utils.file_exists("brand_counts.csv"):
        generate_brand_counts_csv()
    else:
        print("brand_counts.csv file already exists")
Exemple #2
0
 def __download_image_file(self):
     """
     Downloads the image file
     :return: the image file object
     """
     if not file_utils.file_exists(self.image_file_path):
         logger.info('Downloading Image from - ' + self.image_url)
         return file_utils.download(self.image_url, self.download_path)
 def __download_image_file(self):
     """
     Downloads the image file
     :return: the image file object
     """
     if not file_utils.file_exists(self.image_file_path):
         logger.info('Downloading Image from - ' + self.image_url)
         return file_utils.download(self.image_url, self.download_path)
    def testFileExists(self):
        """
        Ensure the file_utils.fileExists() method returns false with a directory
        """
        if not os.path.exists(self.tmpFile):
            os.makedirs(self.tmpFile)

        result = file_utils.file_exists(self.tmpFile)
        self.assertTrue(result)
Exemple #5
0
    def testFileExists(self):
        """
        Ensure the file_utils.fileExists() method returns false with a directory
        """
        if not os.path.exists(self.tmpFile):
            os.makedirs(self.tmpFile)

        result = file_utils.file_exists(self.tmpFile)
        self.assertTrue(result)
Exemple #6
0
 def __get_image_file(self):
     """
     Returns the image file reference.
     If the image file does not exist, download it
     :return: the image file object
     """
     if file_utils.file_exists(self.image_file_path):
         return open(self.image_file_path, 'r')
     else:
         if not os.path.exists(self.download_path):
             os.makedirs(self.download_path)
         logger.info('Found existing image file')
         return self.__download_image_file()
 def __get_image_file(self):
     """
     Returns the image file reference.
     If the image file does not exist, download it
     :return: the image file object
     """
     if file_utils.file_exists(self.image_file_path):
         return open(self.image_file_path, 'r')
     else:
         if not os.path.exists(self.download_path):
             os.makedirs(self.download_path)
         logger.info('Found existing image file')
         return self.__download_image_file()
Exemple #8
0
def match_sites_dataframe(dataframe, matches_json="", top_n=5):
    '''
    Generates a dataframe of matched sites.
    matches_json is an optional parameter for saving and loading slow to generate
    description based matches.
    INPUTS:
     - dataframe
     - matches_json -- A string representing the filename of a json file containing old matches to speed up processing
     - top_n (int) -- Maximum amount of matches to return for each item
    OUTPUTS:
     - matches_df
    '''

    #Missing values should be represented by empty strings
    dataframe = dataframe.fillna(value="")

    #Ensure we have the correct columns
    dataframe = pandas.DataFrame(dataframe.to_dict("records"),
                                 columns=ALL_FIELDNAMES)

    #Fill any columns we just added with "-1" to mark it wasn't originally there
    dataframe = dataframe.fillna(value="-1")

    #Make sure everything in that dataframe is a string
    dataframe = dataframe.applymap(lambda x: str(x))

    #Remove extra whitespace
    dataframe = dataframe.applymap(lambda x: x.strip()
                                   if type(x) == str else x)

    if "Match Site" in dataframe.columns:
        ndf = dataframe[dataframe["Match Site"] == "-1"]
        if ndf.empty:
            #No new rows.
            return pandas.DataFrame()
        odf = dataframe[dataframe["Match Site"] != "-1"]
        if odf.empty:
            old_rows = []
        else:
            old_rows = odf.to_dict("records")
        new_rows = ndf.to_dict("records")
    else:
        new_rows = dataframe.to_dict("records")
        old_rows = []
    # Add a 'Description' field to new_rows
    site_rows = [{
        **row, "Description": row["Stock Description"]
    } for row in new_rows]
    old_site_rows = remove_duplicate_rows(old_rows)
    old_item_ids_to_rows = generate_item_ids_to_rows(old_rows)

    # Generate desc_matches based on matches_json
    desc_matches = {}
    if matches_json:
        if file_utils.file_exists(matches_json):
            desc_matches = file_utils.read_json(matches_json)
        else:
            desc_matches = match_by_description(site_rows, old_site_rows)
            file_utils.save_json(matches_json, desc_matches)

    matches_rows = match_sites(site_rows,
                               old_site_rows,
                               old_item_ids_to_rows,
                               desc_matches,
                               top_n=top_n)
    matches_df = pandas.DataFrame(matches_rows, columns=OUTPUT_FIELDNAMES)
    matches_df = matches_df.fillna(value="")
    matches_df = matches_df[OUTPUT_FIELDNAMES]
    return matches_df
Exemple #9
0
        "Maximum amount of matches to return for each row. Default value is 5.",
        type=int,
        default=5)

    args = parser.parse_args()

    sites_rows = file_utils.read_csv(args.filename)
    output_file = args.output
    matches_json = args.match_data
    if not matches_json:
        matches_json = ""
    top_n = args.matches

    stime = time.time()

    if file_utils.file_exists(output_file):
        old_rows = file_utils.read_csv(output_file)
    else:
        old_rows = []

    ndf = pandas.DataFrame(sites_rows)
    odf = pandas.DataFrame(old_rows)
    all_columns = ndf.columns.union(odf.columns)
    ndf = ndf.reindex(columns=all_columns, fill_value="-1")
    odf = odf.reindex(columns=all_columns, fill_value="-1")
    df = pandas.concat([ndf, odf]).reset_index(drop=True)

    if output_file:
        matches_df = match_sites_dataframe(df,
                                           matches_json=matches_json,
                                           top_n=top_n)
 def testFileNotExist(self):
     """
     Ensure the file_utils.fileExists() method returns false with a bogus file
     """
     result = file_utils.file_exists('/foo/bar.txt')
     self.assertFalse(result)
 def testFileIsDirectory(self):
     """
     Ensure the file_utils.fileExists() method returns false with a directory
     """
     result = file_utils.file_exists(self.tmpDir)
     self.assertFalse(result)
Exemple #12
0
 def testFileNotExist(self):
     """
     Ensure the file_utils.fileExists() method returns false with a bogus file
     """
     result = file_utils.file_exists('/foo/bar.txt')
     self.assertFalse(result)
Exemple #13
0
 def testFileIsDirectory(self):
     """
     Ensure the file_utils.fileExists() method returns false with a directory
     """
     result = file_utils.file_exists(self.tmpDir)
     self.assertFalse(result)