def changeURLs(dataLines): changedUrlsLines = [] dataset = Dataset(dataLines) for line in dataset.dataLines: columns = dataset.getColumns(line) columns['url'] = Brand.brandsUrls[ columns['brand'] ] changedUrlsLines.append( dataset.getLine(columns) ) return changedUrlsLines
def fixSizeTypes(dataLines): fixedDataLines = [] dataset = Dataset(dataLines) for line in dataset.dataLines: columns = dataset.getColumns(line) columns['size_type'] = columns['size_type'].replace(" ", "_") fixedDataLines.append( dataset.getLine(columns) ) return fixedDataLines
def mergeSynonymousSizeTypes(dataLines): mergedDataLines = [] dataset = Dataset(dataLines) for line in dataset.dataLines: columns = dataset.getColumns(line) if columns['size_type'] in SizeType.mergedSizeTypes: columns['size_type'] = SizeType.mergedSizeTypes[ columns['size_type'] ] mergedDataLines.append( dataset.getLine(columns) ) return mergedDataLines
def doUpperCase(dataLines): upperCaseLines = [] dataset = Dataset(dataLines) for line in dataset.dataLines: columns = dataset.getColumns(line) columns['size_type'] = columns['size_type'].upper() columns['label'] = columns['label'].upper() columns['brand'] = columns['brand'].upper() columns['clothe_category'] = columns['clothe_category'].upper() columns['size_category'] = columns['size_category'].upper() columns['gender'] = columns['gender'].upper() upperCaseLines.append( dataset.getLine(columns) ) return upperCaseLines