def transform(self, dataDic): if "guid" not in dataDic: dataDic["guid"] = "None" dateFactory = DateFactory() writerFilter = WriterFilter() for key, val in dataDic.items(): try : # 시간 if key.endswith("TimeText"): dataDic[key[:-4]] = dateFactory.getUnixTimestamp(val) elif key in self.timeFields : dataDic[key] = self.getTimestamp(val) # 숫자(미사용) #elif key.endswith("Count") : # dataDic[key[:-4]] = self.getNumber(val) #elif key in self.numberFields : # dataDic[key] = self.getNumber(val) # 기자명 elif key == "authorText": authorName, authorEmail = writerFilter.getWriterAndEmail(val) if authorName and ("authorName" not in dataDic): dataDic["authorName"] = authorName if authorEmail and ("authorEmail" not in dataDic): dataDic["authorEmail"] = authorEmail # Email elif key.endswith("Email") : dataDic[key] = self.getEmail(val) # 뉴스SC 크롤매체 표기 elif key == "type" : if val == "NEWS" : dataDic["sourceType"] = 4 except Exception, msg : self.logger.error("Field [%s] transform error at GUID [%s] : %s"%(key, dataDic["guid"], msg) )
from resource_writer import WRITER_DATA import optparse cmdParser = optparse.OptionParser(usage="") (cmdOptions, cmdArgs) = cmdParser.parse_args() # TEST 할 데이터만 주석제거 -------------------------------------------------------------- #testData = TIME_AGODATA # 한달 전 #testData = TIME_DATEDATA # 2014 Mar 7 3:12:30 PM testData = WRITER_DATA # 박시우 인턴기자 #testData = "댓글 수 [3524]" # 직접지정 # 필드단위 TEST ------------------------------------------------------------------------------ writerFilter = WriterFilter() fieldTransformer = FieldTransformer() for inputStr in testData.split("\n") : if inputStr.strip() : # 테스트 할 필드만 주석 제거 print "INPUT STR : ",inputStr #print "getEmail : ",fieldTransformer.getEmail(inputStr) #print "getNumber : ",fieldTransformer.getNumber(inputStr) #print "getTimestamp : ",fieldTransformer.getTimestamp(inputStr) print "getWriterAndEmail : ",getListStr( writerFilter.getWriterAndEmail(inputStr) ) print "#######################################" # 문서단위 TEST ------------------------------------------------------------------------------