def testFileSearch(self): self.assertEqual(file_name[1], dt.file_by_name(file_name[0], default_dir)) self.assertEqual(file_names[1], dt.files_by_name(file_names[0], default_dir)) self.assertEqual(file_extension1[1], dt.file_by_extension(file_extension1[0], default_dir)) self.assertEqual(file_extension2[1], dt.file_by_extension(file_extension2[0], default_dir)) self.assertEqual(file_extensions[1], dt.files_by_extension(file_extensions[0], default_dir)) self.assertEqual(file_partial[1], dt.file_by_partial(file_partial[0], default_dir)) self.assertEqual(file_partials[1], dt.files_by_partial(file_partials[0], default_dir)) self.assertIsNone(dt.file_by_name(missing_file, sub_dir))
def main(): print "setting up directories..." dt.clear_or_create(DIRECTORIES["temp"]) dt.create_directory(DIRECTORIES["archives"]) print "done setting up directories" ftype = ft.get_type(unpack_file) print "unpacking and flattening files..." unpack.unpack(unpack_file, DIRECTORIES["temp"]) unpack.flatten_folder(DIRECTORIES["temp"]) # I could have flatten_folder return a list of files in the directory, so that # we wouldn't have to search through the directory everytime for specific files # since os.walk is slow with directories with large files print "done unpacking and flattening" sp = SchemaProps(SCHEMA_URL) file_details = {"file":unpack_file, "process_time":process_time, "file_timestamp":file_timestamp} election_details = {} vip_id = None election_id = None print "converting to db style flat files...." if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]): file_details.update(process_config(DIRECTORIES["temp"], DIRECTORIES["temp"] + CONFIG_FILE, sp)) if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0: file_details.update(process_flatfiles(DIRECTORIES["temp"], sp)) print "processing xml files..." xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"]) if len(xml_files) >= 1: ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0], sp.full_header_data("db"), sp.version) os.remove(xml_files[0]) if "valid_files" in file_details: file_details["valid_files"].append(xml_files[0]) else: file_details["valid_files"] = [xml_files[0]] print "done processing xml files" print "getting feed details..." db = EasySQL("localhost","vip","username","password") try: with open(DIRECTORIES["temp"] + "source.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() vip_id = row["vip_id"] election_details["vip_id"] = vip_id with open(DIRECTORIES["temp"] + "election.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() election_details["election_date"] = row["date"] election_details["election_type"] = row["election_type"] except: er.report_summary(vip_id, election_id, file_details, election_details) return election_id = get_election_id(election_details, db) election_details["election_id"] = election_id print "done getting feed details" print "converting to full db files...." element_counts, error_data, warning_data = convert_to_db_files(vip_id, election_id, file_details["file_timestamp"], DIRECTORIES["temp"], sp) print "done converting to full db files" er.report_summary(vip_id, election_id, file_details, election_details, element_counts) if len(error_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], error_data, "error") if len(warning_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], warning_data, "warning") update_data(vip_id, election_id, file_details["file_timestamp"], db, element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"]) db_validations(vip_id, election_id, db, sp) generate_feed(file_details)
def main(): print "setting up directories..." dt.clear_or_create(DIRECTORIES["temp"]) dt.create_directory(DIRECTORIES["archives"]) print "done setting up directories" ftype = ft.get_type(unpack_file) print "unpacking and flattening files..." unpack.unpack(unpack_file, DIRECTORIES["temp"]) unpack.flatten_folder(DIRECTORIES["temp"]) # I could have flatten_folder return a list of files in the directory, so that # we wouldn't have to search through the directory everytime for specific files # since os.walk is slow with directories with large files print "done unpacking and flattening" sp = SchemaProps(SCHEMA_URL) file_details = { "file": unpack_file, "process_time": process_time, "file_timestamp": file_timestamp } election_details = {} vip_id = None election_id = None print "converting to db style flat files...." if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]): file_details.update( process_config(DIRECTORIES["temp"], DIRECTORIES["temp"] + CONFIG_FILE, sp)) if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0: file_details.update(process_flatfiles(DIRECTORIES["temp"], sp)) print "processing xml files..." xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"]) if len(xml_files) >= 1: ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0], sp.full_header_data("db"), sp.version) os.remove(xml_files[0]) if "valid_files" in file_details: file_details["valid_files"].append(xml_files[0]) else: file_details["valid_files"] = [xml_files[0]] print "done processing xml files" print "getting feed details..." db = EasySQL("localhost", "vip", "username", "password") try: with open(DIRECTORIES["temp"] + "source.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() vip_id = row["vip_id"] election_details["vip_id"] = vip_id with open(DIRECTORIES["temp"] + "election.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() election_details["election_date"] = row["date"] election_details["election_type"] = row["election_type"] except: er.report_summary(vip_id, election_id, file_details, election_details) return election_id = get_election_id(election_details, db) election_details["election_id"] = election_id print "done getting feed details" print "converting to full db files...." element_counts, error_data, warning_data = convert_to_db_files( vip_id, election_id, file_details["file_timestamp"], DIRECTORIES["temp"], sp) print "done converting to full db files" er.report_summary(vip_id, election_id, file_details, election_details, element_counts) if len(error_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], error_data, "error") if len(warning_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], warning_data, "warning") update_data(vip_id, election_id, file_details["file_timestamp"], db, element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"]) db_validations(vip_id, election_id, db, sp) generate_feed(file_details)