예제 #1
0
 def fixFile(self, filePath):
     print(filePath)
     csvReader = csv.reader(open(filePath, "r"),
                            delimiter='|',
                            quotechar='"')
     inputRow = None
     for row in csvReader:
         inputRow = self.createForwardRowV0(row)
         break
     date = DataReceiverUtil.stringToDate(inputRow["date"])
     newFileName = self.country + "_Spark_Statistics_" + date.strftime(
         "%y%m%d") + ".csv"
     csvWriter = csv.writer(open(
         os.path.join(self.dirPath, "formatted", newFileName), "w"),
                            delimiter=';',
                            quotechar='"',
                            quoting=csv.QUOTE_NONE)
     for row in csvReader:
         inputRow = self.createForwardRowV0(row)
         try:
             csvWriter.writerow([
                 0, inputRow["lob"], inputRow["neid"], inputRow["target"],
                 inputRow["dataSize"], inputRow["date"]
             ])
         except Exception as e:
             print(e)
예제 #2
0
 def createForwardRowV1(self, row):
   inputRow = {}
   inputRow["country"] = self.country
   inputRow["lob"] = row[1]
   inputRow["type"] = "forwards"
   inputRow["flowName"] = row[2] + ":" + row[3]
   inputRow["dataSize"] = row[4]
   inputRow["date"] = DataReceiverUtil.stringToDate(row[5]).astimezone(utc)
   return inputRow
예제 #3
0
 def createInputRowV0(self, row):
   inputRow = {}
   inputRow["country"] = row[1]
   inputRow["lob"] = row[2]
   inputRow["type"] = "inputs"
   inputRow["flowName"] = row[3]
   inputRow["dataSize"] = int(row[5])
   inputRow["date"] = DataReceiverUtil.stringToDate(row[6]).astimezone(utc)
   return inputRow
예제 #4
0
 def createInputRow(self, row):
   inputRow = {}
   inputRow["country"] = row[1]
   inputRow["lob"] = row[2]
   inputRow["type"] = "inputs"
   inputRow["flowName"] = row[3]
   inputRow["dataSize"] = row[5]
   inputRow["date"] = util.stringToDate(row[6]).replace(tzinfo=pytz.timezone('CET'))
   return inputRow
예제 #5
0
 def createForwardRow(self, country, row):
   forward = {}
   forward["country"] = country
   forward["type"] = "forwards"
   forward["lob"] = row[0].strip()
   forward["neid"] = row[1].strip()
   forward["target"] = row[2].strip()
   forward["flowName"] = forward["neid"] + ":" + forward["target"]
   forward["dataSize"] = int(row[3].strip())
   forward["date"] = util.stringToDate(row[5]).replace(tzinfo=pytz.timezone('CET'))
   return forward
예제 #6
0
import csv

import pytz

from mediation.data_receiver import DataReceiverConfig
from mediation.data_receiver import DataReceiverUtil

LATEST_DATE = DataReceiverUtil.stringToDate("20.02.16 00:00:00")
utc = pytz.timezone('UTC')


def isValidDate(d):
  return d > LATEST_DATE


def isValidFlow(flow):
  return flow["country"] + "_" + flow["lob"] not in DataReceiverConfig.IGNORE_LOBS


LATEST_VERSION = 1


class DataParser:
  """
  Parses csv file and allows called to iterate over a lists of 10000 records.
  """
  def __init__(self, stream, type, country, version):
    self.batchSize = 10000
    self.reader = csv.reader(stream, delimiter=';', quotechar='"')
    self.type = type
    self.country = country
예제 #7
0
import csv

import pytz

import mediation.data_receiver.DataReceiverConfig as config
import mediation.data_receiver.DataReceiverUtil as util
from common import AppConfig
from .data_insertor import DataInsertor

LATEST_DATE = util.stringToDate("20.02.16 00:00:00").replace(tzinfo=AppConfig.getTimezone())


def isValidFlow(flow):
  return flow["date"] > LATEST_DATE and flow["country"] in config.COUNTRIES and flow["lob"] not in config.IGNORE_LOBS

"""
deprecated
"""
class FileParser:
  def __init__(self):
    self.batchSize = 100000

  def parseInputs(self, inputFile):
    inputsList = []
    dataInsertor = DataInsertor()
    with open(inputFile, 'r') as csvfile:
      spamreader = csv.reader(csvfile, delimiter=';', quotechar='"')
      for row in spamreader:
        try:
          input = self.createInputRow(row)
          if isValidFlow(input):