def main(): nameLookUpDictionary = {} with open(get_data_resource("csv_descriptions/"+sys.argv[1]), 'r') as jsonFile: jsonDict = json.load(jsonFile) with open(get_data_resource("csv_descriptions/"+sys.argv[2]), 'r') as pointListFile: pointList = json.load(pointListFile) counter = 0 parsed = 0 unparseable = 0 for (k,v) in pointList.items(): counter += 1 tagsSet = decodeName(k, jsonDict) if (tagsSet != None): if (tagsSet == "Not quite"): unparseable += 1 else : parsed += 1 # print (k, "\n" + stringHumanReadable(tagsSet, jsonDict)) # print (k, tagsSet) print (k, tagName(k, jsonDict)) # else: # print (k) print (parsed, counter - unparseable, counter, float(parsed * 100) / float(counter - unparseable), float(parsed * 100) / float(counter))
def transform_all_files(directory_path=None): """ Loops over every .csv file and calls transform_file function on it :return: None (Output better files in data/better_csv_files) """ if not directory_path: directory_path = get_data_resource("csv_files") results = [] for file in os.listdir(directory_path): if file.endswith(".csv"): results.append( transform_file(get_data_resource("csv_files/" + file))) return results
def __init__(self, input_stream): self.source = Sources.SIEMENS self.db_connection = DatabaseConnection() self.siemens_data = pd.read_csv(input_stream) tag_json = open(get_data_resource("csv_descriptions/PointDecoder.json")) self.tag_dict = json_load(tag_json) self.points_with_ids = {}
def test_transform_all_files_all_transformations_appear(self): pre_transform = [] for file in os.listdir(get_data_resource("csv_files")): if file.endswith(".csv"): pre_transform.append(file) results = siemens_parser.transform_all_files() self.assertTrue(len(results) == len(pre_transform))
def test_transform_file(self): file_name = "HULINGS.AUDIT.TRENDRPT1_171016.csv" new_csv = siemens_parser.transform_file(get_data_resource("csv_files/" + file_name)) readlines = new_csv.readlines() self.assertTrue("Point" not in " ".join(readlines))
def main(csv_file): """ Read in individual file and add all subpoints to DB :return: """ transformed_file = transform_file(get_data_resource("csv_files/"+csv_file)) sr = SiemensReader(transformed_file) sr.add_to_db() sr.db_connection.close_connection()
def main(): """ Run `python3 -m src.datareaders.siemens.siemens_parser transform <file_name> to transform a CSV If no commandline arguments --> Transforms all .csv files in data/csv_files/ :return: None """ if len(sys.argv) > 1: # CASE: transform the CSV from the bad input to the better one if sys.argv[1] == "transform": directory_path = get_data_resource('csv_files') file = sys.argv[2] transform_file(directory_path, file) # CASE: transforms every .csv file in csv_files to a better format else: transform_all_files()
if point_value > 0: # Round point_value to 5 decimal places. point_value = round(point_value, 5) # Multiply point_value by 100000 to get as long int point_value *= 100000 return int(point_value) except: return "Invalid" def main(input_stream): """ Initialize lucid_parser, then put data into correct tables in DB. :return: None """ lucid_reader = LucidReader(input_stream) lucid_reader.db_connection.close_connection() if __name__ == '__main__': if not sys.stdin.isatty(): # we have a stdin so get our input stream from that main(sys.stdin.read()) else: file_name = "DormData2013-18.csv" if len(sys.argv) > 1: file_name = sys.argv[1] path = get_data_resource("csv_files/" + file_name) with open(path, 'r') as input_stream: main(input_stream)