def fixFile(self, filePath): print(filePath) csvReader = csv.reader(open(filePath, "r"), delimiter='|', quotechar='"') inputRow = None for row in csvReader: inputRow = self.createForwardRowV0(row) break date = DataReceiverUtil.stringToDate(inputRow["date"]) newFileName = self.country + "_Spark_Statistics_" + date.strftime( "%y%m%d") + ".csv" csvWriter = csv.writer(open( os.path.join(self.dirPath, "formatted", newFileName), "w"), delimiter=';', quotechar='"', quoting=csv.QUOTE_NONE) for row in csvReader: inputRow = self.createForwardRowV0(row) try: csvWriter.writerow([ 0, inputRow["lob"], inputRow["neid"], inputRow["target"], inputRow["dataSize"], inputRow["date"] ]) except Exception as e: print(e)
def createForwardRowV1(self, row): inputRow = {} inputRow["country"] = self.country inputRow["lob"] = row[1] inputRow["type"] = "forwards" inputRow["flowName"] = row[2] + ":" + row[3] inputRow["dataSize"] = row[4] inputRow["date"] = DataReceiverUtil.stringToDate(row[5]).astimezone(utc) return inputRow
def createInputRowV0(self, row): inputRow = {} inputRow["country"] = row[1] inputRow["lob"] = row[2] inputRow["type"] = "inputs" inputRow["flowName"] = row[3] inputRow["dataSize"] = int(row[5]) inputRow["date"] = DataReceiverUtil.stringToDate(row[6]).astimezone(utc) return inputRow
def createInputRow(self, row): inputRow = {} inputRow["country"] = row[1] inputRow["lob"] = row[2] inputRow["type"] = "inputs" inputRow["flowName"] = row[3] inputRow["dataSize"] = row[5] inputRow["date"] = util.stringToDate(row[6]).replace(tzinfo=pytz.timezone('CET')) return inputRow
def createForwardRow(self, country, row): forward = {} forward["country"] = country forward["type"] = "forwards" forward["lob"] = row[0].strip() forward["neid"] = row[1].strip() forward["target"] = row[2].strip() forward["flowName"] = forward["neid"] + ":" + forward["target"] forward["dataSize"] = int(row[3].strip()) forward["date"] = util.stringToDate(row[5]).replace(tzinfo=pytz.timezone('CET')) return forward
import csv import pytz from mediation.data_receiver import DataReceiverConfig from mediation.data_receiver import DataReceiverUtil LATEST_DATE = DataReceiverUtil.stringToDate("20.02.16 00:00:00") utc = pytz.timezone('UTC') def isValidDate(d): return d > LATEST_DATE def isValidFlow(flow): return flow["country"] + "_" + flow["lob"] not in DataReceiverConfig.IGNORE_LOBS LATEST_VERSION = 1 class DataParser: """ Parses csv file and allows called to iterate over a lists of 10000 records. """ def __init__(self, stream, type, country, version): self.batchSize = 10000 self.reader = csv.reader(stream, delimiter=';', quotechar='"') self.type = type self.country = country
import csv import pytz import mediation.data_receiver.DataReceiverConfig as config import mediation.data_receiver.DataReceiverUtil as util from common import AppConfig from .data_insertor import DataInsertor LATEST_DATE = util.stringToDate("20.02.16 00:00:00").replace(tzinfo=AppConfig.getTimezone()) def isValidFlow(flow): return flow["date"] > LATEST_DATE and flow["country"] in config.COUNTRIES and flow["lob"] not in config.IGNORE_LOBS """ deprecated """ class FileParser: def __init__(self): self.batchSize = 100000 def parseInputs(self, inputFile): inputsList = [] dataInsertor = DataInsertor() with open(inputFile, 'r') as csvfile: spamreader = csv.reader(csvfile, delimiter=';', quotechar='"') for row in spamreader: try: input = self.createInputRow(row) if isValidFlow(input):