def main():
    nameLookUpDictionary = {}
    with open(get_data_resource("csv_descriptions/"+sys.argv[1]), 'r') as jsonFile:
        jsonDict = json.load(jsonFile)
        
    with open(get_data_resource("csv_descriptions/"+sys.argv[2]), 'r') as pointListFile:
        pointList = json.load(pointListFile)
    
    counter = 0
    parsed = 0
    unparseable = 0
    for (k,v) in pointList.items():
        counter += 1
        tagsSet = decodeName(k, jsonDict)
        if (tagsSet != None):
            if (tagsSet == "Not quite"):
                unparseable += 1
            else :
                parsed += 1
#                print (k, "\n" +  stringHumanReadable(tagsSet, jsonDict))
#                print (k, tagsSet)
                print (k, tagName(k, jsonDict))
#        else:
#            print (k)

    print (parsed, counter - unparseable, counter, float(parsed * 100) / float(counter - unparseable), float(parsed * 100) / float(counter))
def transform_all_files(directory_path=None):
    """
    Loops over every .csv file and calls transform_file function on it
    :return: None (Output better files in data/better_csv_files)
    """
    if not directory_path:
        directory_path = get_data_resource("csv_files")
    results = []

    for file in os.listdir(directory_path):
        if file.endswith(".csv"):
            results.append(
                transform_file(get_data_resource("csv_files/" + file)))
    return results
예제 #3
0
 def __init__(self, input_stream):
     self.source = Sources.SIEMENS
     self.db_connection = DatabaseConnection()
     self.siemens_data = pd.read_csv(input_stream)
     tag_json = open(get_data_resource("csv_descriptions/PointDecoder.json"))
     self.tag_dict = json_load(tag_json)
     self.points_with_ids = {}
예제 #4
0
    def test_transform_all_files_all_transformations_appear(self):
        pre_transform = []
        for file in os.listdir(get_data_resource("csv_files")):
            if file.endswith(".csv"):
                pre_transform.append(file)

        results = siemens_parser.transform_all_files()
        self.assertTrue(len(results) == len(pre_transform))
예제 #5
0
    def test_transform_file(self):
        file_name = "HULINGS.AUDIT.TRENDRPT1_171016.csv"

        new_csv = siemens_parser.transform_file(get_data_resource("csv_files/" + file_name))

        readlines = new_csv.readlines()

        self.assertTrue("Point" not in " ".join(readlines))
예제 #6
0
def main(csv_file):
    """
    Read in individual file and add all subpoints to DB
    :return:
    """
    transformed_file = transform_file(get_data_resource("csv_files/"+csv_file))

    sr = SiemensReader(transformed_file)
    sr.add_to_db()
    sr.db_connection.close_connection()
def main():
    """
    Run `python3 -m src.datareaders.siemens.siemens_parser transform <file_name> to transform a CSV
    If no commandline arguments --> Transforms all .csv files in data/csv_files/
    :return: None
    """
    if len(sys.argv) > 1:
        # CASE: transform the CSV from the bad input to the better one
        if sys.argv[1] == "transform":
            directory_path = get_data_resource('csv_files')
            file = sys.argv[2]
            transform_file(directory_path, file)

    # CASE: transforms every .csv file in csv_files to a better format
    else:
        transform_all_files()
            if point_value > 0:
                # Round point_value to 5 decimal places.
                point_value = round(point_value, 5)
                # Multiply point_value by 100000 to get as long int
                point_value *= 100000
            return int(point_value)
        except:
            return "Invalid"


def main(input_stream):
    """
    Initialize lucid_parser, then put data into correct tables in DB.
    :return: None
    """
    lucid_reader = LucidReader(input_stream)
    lucid_reader.db_connection.close_connection()


if __name__ == '__main__':
    if not sys.stdin.isatty():
        # we have a stdin so get our input stream from that
        main(sys.stdin.read())
    else:
        file_name = "DormData2013-18.csv"
        if len(sys.argv) > 1:
            file_name = sys.argv[1]
        path = get_data_resource("csv_files/" + file_name)
        with open(path, 'r') as input_stream:
            main(input_stream)