def transform(self, dataDic): if "guid" not in dataDic: dataDic["guid"] = "None" dateFactory = DateFactory() writerFilter = WriterFilter() for key, val in dataDic.items(): try : # 시간 if key.endswith("TimeText"): dataDic[key[:-4]] = dateFactory.getUnixTimestamp(val) elif key in self.timeFields : dataDic[key] = self.getTimestamp(val) # 숫자(미사용) #elif key.endswith("Count") : # dataDic[key[:-4]] = self.getNumber(val) #elif key in self.numberFields : # dataDic[key] = self.getNumber(val) # 기자명 elif key == "authorText": authorName, authorEmail = writerFilter.getWriterAndEmail(val) if authorName and ("authorName" not in dataDic): dataDic["authorName"] = authorName if authorEmail and ("authorEmail" not in dataDic): dataDic["authorEmail"] = authorEmail # Email elif key.endswith("Email") : dataDic[key] = self.getEmail(val) # 뉴스SC 크롤매체 표기 elif key == "type" : if val == "NEWS" : dataDic["sourceType"] = 4 except Exception, msg : self.logger.error("Field [%s] transform error at GUID [%s] : %s"%(key, dataDic["guid"], msg) )
#testData = TIME_DATEDATA # 2014 Mar 7 3:12:30 PM testData = WRITER_DATA # 박시우 인턴기자 #testData = "댓글 수 [3524]" # 직접지정 # 필드단위 TEST ------------------------------------------------------------------------------ writerFilter = WriterFilter() fieldTransformer = FieldTransformer() for inputStr in testData.split("\n") : if inputStr.strip() : # 테스트 할 필드만 주석 제거 print "INPUT STR : ",inputStr #print "getEmail : ",fieldTransformer.getEmail(inputStr) #print "getNumber : ",fieldTransformer.getNumber(inputStr) #print "getTimestamp : ",fieldTransformer.getTimestamp(inputStr) print "getWriterAndEmail : ",getListStr( writerFilter.getWriterAndEmail(inputStr) ) print "#######################################" # 문서단위 TEST ------------------------------------------------------------------------------ testDic = dict() testDic["createTimeText"] = "108명 읽음2개 덧글5 시간 전" testDic["replyCountText"] = "댓글 5" testDic["modifiedTimeText"] = "12/11/2013 10:30:48 PM" testDic["authorText"] = "박성준 기자 | [email protected]" #testDic["authorName"] = "박시우" testDic["companyEmail"] = "*****@*****.**" # 페이지 단위------------------------------------------------------------------------------ """