def eval(self): result = False if self.is_inc is not None and self.number_of_days is not None \ and self.growth_percent is not None and self.accessor_method is not None \ and self.symbol_id is not None: current_day_str = dateConv.to_str(self.world.current_day) start_day_str = dateConv.get_date_str_back_x( current_day_str, self.number_of_days) curr_value = self.accessor_method(self.symbol_id, current_day_str) start_value = self.accessor_method(self.symbol_id, start_day_str) if self.is_inc: if curr_value > start_value: change = (curr_value - start_value) * 100 / start_value result = self.growth_percent < change else: result = False elif not self.is_inc: if curr_value < start_value: change = (start_value - curr_value) * 100 / start_value result = self.growth_percent < change else: result = False else: result = False if self.is_negated: return not result else: return result
def add_exchange_price(self, date_str, price): present_rate = self.__exchange_prices.get(date_str) if present_rate is not None: raise ValueError("Sir you are trying to redefine " + self.name + "on the day of " + date_str) else: self.__exchange_prices[date_str] = price if self.__earliest_date is None or dateConv.to_date(date_str) < self.__earliest_date: self.__earliest_date = dateConv.to_date(date_str)
def get_exchange_rate(self, date_str): asked_rate = self.__exchange_rates.get(date_str) if dateConv.to_date(date_str) < self.__earliest_date: raise ValueError("Trying to obtain non existing data, Sir") if asked_rate is not None: return asked_rate else: new_date = dateConv.to_date(date_str) - timedelta(1) return self.get_exchange_rate(dateConv.to_str(new_date))
def parse_event(event_elem, symbol_table, file_name, counter): event_type = event_elem.find('type') event_name = event_elem.find('name') event_value = event_elem.find('value') event_date = event_elem.find('date') if event_type is None or event_name is None or event_value is None or event_date is None: raise ValueError( "An event must consist of name, type, value, date elements, one is missing in " + file_name + " at " + str(counter) + " , Sir") if event_type.text == "" or event_name.text == "" or event_value.text == "" or event_date.text == "": raise ValueError( "An event must consist of name, type, value, date elements, one is empty in " + file_name + " at " + str(counter) + " , Sir") symbol_id = None if event_type.text == EventType.CURRENCY.name: symbol_id = symbol_table.get_currency(event_name.text) elif event_type.text == EventType.STOCK.name: symbol_id = symbol_table.get_stock(event_name.text) else: raise ValueError( "Terribly sorry Sir, but type must be CURRENCY or STOCK in " + file_name + " at " + str(counter)) if not dateConv.is_date_str_valid_format(event_date.text): raise ValueError( "Terribly sorry Sir, but date format is incorrect. It should be yyyy.MM.dd in " + file_name + " at " + str(counter)) return Event(event_type.text, event_date.text, symbol_id, float(event_value.text))
def run_reality(self, date_start, date_stop): rules_list = list(self.rules.values()) rules_list.sort(key=lambda x: x.priority, reverse=True) self.world.set_start_date(date_start) while True: for rule in rules_list: rule.execute() if dC.to_str(self.world.current_day) == date_stop: break else: self.next_day()
def _get_date_arg(lexer, engine): token = lexer.get_token() if token.token_type != TokenType.list_start: if token.token_type == TokenType.whitespace: return dateConv.to_str(engine.world.current_day) else: raise ValueError( "Forbidden token found, expected whitespace or ( amount" + str(token.token_value) + " ,Sir.") else: token = utils.get_token_skipping_whitespace(lexer) if token.token_type == TokenType.number: result = dateConv.get_date_back_x(dateConv.to_str(engine.world.current_day), math.fabs(token.token_value)) token = utils.get_token_skipping_whitespace(lexer) if token.token_type != TokenType.list_end: raise ValueError("Expected ) , found: " + str(token.token_value) + " ,Sir.") return dateConv.to_str(result) if token.token_type == TokenType.date: result = token.token_value token = utils.get_token_skipping_whitespace(lexer) if token.token_type != TokenType.list_end: raise ValueError("Expected ) , found: " + str(token.token_value) + " ,Sir.") return dateConv.to_str(result) raise ValueError(" Expected Number or @date@, found neither, Sir")
def test_getCorrectEvents(self): symbol_table = SymbolsTable() symbol_table.add_currency('YUA') symbol_table.add_currency('USD') symbol_table.add_stock('CocaCola') symbol_table.add_stock('NukaCola') parsed_file = open("rule.txt") lexer = Lexer(parsed_file) controller = RealityController() controller.world.current_day = dC.to_date('2016.05.16') controller.world.add_currency(Currency('yuan', 'YUA', 0)) controller.world.add_currency(Currency('usa dollar', 'USD', 1)) controller.world.add_stock(Stock('CocaCola', 1, 0)) controller.world.add_stock(Stock('NukaCola', 1, 1)) controller.add_event(Event(EventType.CURRENCY, '2016.05.16', 0, 500)) controller.add_event(Event(EventType.CURRENCY, '2016.05.15', 0, 400)) controller.add_event(Event(EventType.STOCK, '2016.05.16', 0, 500)) controller.add_event(Event(EventType.STOCK, '2016.05.15', 0, 400)) controller.add_event(Event(EventType.STOCK, '2016.05.14', 0, 400)) controller.add_event(Event(EventType.STOCK, '2016.05.13', 0, 400)) Parser.RuleParser.parse_from_lexer(lexer, symbol_table, controller) rule = controller.rules.get(1) print(rule.priority)
def parse(args): splitPath = args.split("/") if len(splitPath): fileName = splitPath[len(splitPath) - 1] logging.info("FILENAME : " + fileName) #PREPROCESSING text = DocumentConverter().convert(args) print text return #FIRST PASS #Extracted Blocks. extracted_blocks, section_blocks = BlockExtractor(text).extractBlocks() #Extract only the Personal Information Portion. personal_info_sections = [ "FIRST_SECTION_OF_RESUME", "PERSONAL INFORMATION", "PERSONAL DETAIL", "PERSONAL DETAILS", "PERSONAL PROFILES", "PERSONAL DOSSIER", "PERSONAL PROFILE", "PERSONAL VITAE", "PERSONAL DATA", "PERSONAL SNIPPETS", "CONTACT DETAILS", "PERSONAL PARTICULAR", "PERSONAL PARTICULARS", "PERSONAL INFORMAIION", "PERSONAL MEMORANDA", "OTHER DETAIL", "OTHER DETAILS" ] phoneNumberExtracted = False emailAddressExtracted = False nameExtracted = False dobExtracted = False allExtracted = False locationExtracted = False genderExtracted = False fatherNameExtracted = False motherNameExtracted = False maritalNameExtracted = False nationalityExtracted = False languagesExtracted = False passportExtracted = False licenseExtracted = False panExtracted = False addressExtracted = False probableNameFromFileName = None probableNameFromSection = None probableNameFromEmail = None probable_mobiles = [] probable_emails = [] probableName = "" possibleLocation = "" gender = "" fathername = "" mothername = "" maritalstate = "" nationality = "" languages = "" passportno = "" licenseno = "" panno = "" tmeinextrname = 0 probableNameFromFileName = getNameFromFileName(fileName) if probableNameFromFileName: splitname = probableNameFromFileName.split() if len(splitname) > 1: #Check whether the word is there in the Opening Section of the resume. if splitname[0].upper( ) in extracted_blocks["FIRST_SECTION_OF_RESUME"][1].upper(): if splitname[1].upper( ) in extracted_blocks["FIRST_SECTION_OF_RESUME"][1].upper(): probableName = " ".join( [splitname[0].upper(), splitname[1].upper()]) logging.info("NAME :" + probableNameFromFileName) else: probableNameFromFileName = splitname[0] else: if splitname[0].upper( ) not in extracted_blocks["FIRST_SECTION_OF_RESUME"].upper(): probableNameFromFileName = None #Years of Experience Extraction. possibleYrsOfExp = getYrsFromFileName(fileName) #Location from File_name. possibleLocation = getLocation(fileName, section_blocks) if possibleYrsOfExp: logging.info("YRS OF EXP :" + str(possibleYrsOfExp)) #Iterate over the possible section header paragraphs to find the 3 Nouns . for section in personal_info_sections: if allExtracted: break personal_info = "" if extracted_blocks.has_key(section): personal_info = extracted_blocks[section][1] #Second Pass personal_info = junkCharactersRemoveRegx.sub("", personal_info) #Remove common words not appearing in name, email, ph personal_info = extraHeadersRemoveRegx.sub("", personal_info) #Replace tabs and 4 white spaces with a new Line. personal_info = replaceTabsRegx.sub(" * ", personal_info) if extracted_blocks.has_key(section): if not addressExtracted: address = AddressExtraction( (section, extracted_blocks[section][1])).get_address() if address: addressExtracted = True #Iterate over each line of section and find name, email and ph for line in personal_info.split("\n"): line = line.strip() if not line: continue if not emailAddressExtracted: email = "" lineparts = notemailRegx.split(line) for linepart in lineparts: emailRes = emailRegx.search(linepart) if emailRes: email = emailRes.group().strip() if email: logging.info("Email : " + email) probable_emails.append(email) emailAddressExtracted = True if not phoneNumberExtracted: contact_number = "" lineparts = notphonenoRegx.split(line) for linepart in lineparts: noRes = phonenoRegx.search(linepart) if noRes: contact_number = noRes.group().strip() contact_number = removebrackethypenRegex.sub( "", contact_number).strip() #Find whether there is a balanced parenthesis if len(re.findall(r'\([^\)]*\)', contact_number)) == 0: #Replace all the parenthesis from the ends. contact_number = contact_number.replace( "(", "").replace(")", "") if len(re.findall(r'\d', contact_number)) >= 10: logging.info("NUMBER :" + contact_number) probable_mobiles.append(contact_number) phoneNumberExtracted = True if not dobExtracted: probable_dob = "" if "birth" in line.lower() and re.search( r'birth\W', line, re.IGNORECASE): probable_dob = dateOfBirthRearCleaningRegex.sub( "", dateOfBirthFrontCleaningRegex.sub("", line)) print probable_dob probable_dob = DateConverter(probable_dob).getDate() logging.info("STANDARD DOB :" + probable_dob) dobExtracted = True if "DOB" in line or "Dob" in line or "d.o.b" in line.lower( ) or "d-o-b" in line.lower(): probable_dob = dateOfBirthRearCleaningRegex.sub( "", dobFrontCleaningRegex.sub("", line)) print probable_dob probable_dob = DateConverter(probable_dob).getDate() logging.info("STANDARD DOB :" + probable_dob) dobExtracted = True #Name Extraction. # The general assumption is that the Name of the applicant occurs in First Few Lines of a Resume. # We consider lines less than 6 words , As name cannot be more than six. # We POS Tag it and we select the first 3 consective NN* words . if not nameExtracted: probableNameFromSection = "" junkStrippedline = removeJunktillAphaRegx.sub("", line) words = junkStrippedline.split() if len(words) < 7: #tag words taggedWords = nltk.pos_tag(words) if len(taggedWords) > 1 and taggedWords[1][1].startswith( "N"): if len(taggedWords ) > 2 and taggedWords[2][1].startswith("N"): probableNameFromSection = taggedWords[0][ 0] + " " + taggedWords[1][ 0] + " " + taggedWords[2][0] else: probableNameFromSection = taggedWords[0][ 0] + " " + taggedWords[1][0] else: if len(taggedWords ) > 0 and taggedWords[0][1].startswith("N"): probableNameFromSection = taggedWords[0][0] probableNameFromSection = probableNameFromSection.strip() if probableNameFromSection: probableNameFromSection = re.sub( r'\W', " ", probableNameFromSection).replace(" ", " ") nameExtracted = True if not genderExtracted: gendermatchobject = GenderRegx.match(line) if gendermatchobject: gender = gendermatchobject.group(3).strip() genderExtracted = True if not fatherNameExtracted: fathernamematchobject = FatherNameRegx.match(line) if fathernamematchobject: fathername = fathernamematchobject.group(2).strip() fatherNameExtracted = True if not motherNameExtracted: mothernamematchobject = MotherNameRegx.match(line) if mothernamematchobject: mothername = mothernamematchobject.group(2).strip() motherNameExtracted = True if not maritalNameExtracted: maritalstatusobject = MaritalNameRegx.match(line) if maritalstatusobject: maritalstate = maritalstatusobject.group(2).strip() maritalNameExtracted = True if not nationalityExtracted: nationalityobject = NationalityNameRegx.match(line) if nationalityobject: nationality = nationalityobject.group(2).strip() nationalityExtracted = True if not languagesExtracted: languagesobject = LanguagesRegx.match(line) if languagesobject: languages = languagesobject.group(2).strip() languagesExtracted = True if not passportExtracted: passportobject = PassportRegx.match(line) if passportobject: passportno = passportobject.group(2).strip() passportExtracted = True if not licenseExtracted: licenseobject = LicenceRegx.match(line) if licenseobject: licenseno = licenseobject.group(2).strip() licenseExtracted = True if not panExtracted: panobject = PanNumberRegx.match(line) if panobject: panno = panobject.group(2).strip() panExtracted = True if phoneNumberExtracted and emailAddressExtracted and nameExtracted and dobExtracted and genderExtracted and fatherNameExtracted and mothernamematchobject and maritalNameExtracted and nationalityExtracted and languagesExtracted and passportExtracted and licenseExtracted and panExtracted: allExtracted = True break #Create List. listHR = [] if emailAddressExtracted and probable_emails: try: probableNameFromEmail = re.match(r'(.*)@.*', probable_emails[0]).group(1) except Exception as e: probableNameFromEmail = probable_emails[0] probableNameFromEmail = re.sub( r'\s+', " ", re.sub(r'\W', " ", probableNameFromEmail)) nameExtracted = True if nameExtracted: list = decideName(probableNameFromFileName, probableNameFromSection, probableNameFromEmail) print list print "TIME TO EXTRACT NAME(sec):\t", tmeinextrname FirstName = list[0] LastName = "" if len(list) > 1: LastName = " ".join(list[1:]) listHR.append(("GivenName", FirstName)) if LastName: listHR.append(("FamilyName", LastName)) if emailAddressExtracted: if probable_emails: listHR.append(("Email", ','.join(probable_emails))) if phoneNumberExtracted: if probable_mobiles: listHR.append(("Mobile", ','.join(probable_mobiles))) if dobExtracted: listHR.append(("DateOfBirth", probable_dob)) if possibleYrsOfExp: listHR.append(("YrsOfExp", str(possibleYrsOfExp))) if possibleLocation: listHR.append(("Location", possibleLocation)) if gender: listHR.append(("gender", str(gender))) if fathername: listHR.append(("FatherName", str(fathername))) if mothername: listHR.append(("MotherName", str(mothername))) if maritalstate: listHR.append(("MaritalState", str(maritalstate))) if nationality: listHR.append(("Nationality", str(nationality))) if languages: listHR.append(("Languages", str(languages))) if passportno: listHR.append(("PassportNo", str(passportno))) if licenseno: listHR.append(("LicenseNo", str(licenseno))) if panno: listHR.append(("PanNo", str(panno))) return listHR
def get_stock_price_now(self, symbol_id): return self.get_stock_price(symbol_id, dateConv.to_str(self.current_day))
def get_currency_rate_now(self, symbol_id): return self.get_currency_rate(symbol_id, dateConv.to_str(self.current_day))
def next_day(self): self.current_day = dateConv.next_day(dateConv.to_str(self.current_day))
def set_start_date(self, start_date_str): self.current_day = dateConv.to_date(start_date_str)
import sys from Engine.Engine import Engine import Utils.DateConverter as dateConv file_name = sys.argv[1] start_date = sys.argv[2] end_date = sys.argv[3] if not dateConv.is_date_str_valid_format(start_date): raise ValueError( 'Date format for start date is not acceptable Sir. It should be yyyy.MM.dd' ) if not dateConv.is_date_str_valid_format(end_date): raise ValueError( 'Date format for end date is not acceptable Sir. It should be yyyy.MM.dd' ) if not dateConv.after(start_date, end_date): raise ValueError( 'Date of simulation start should be before date of simulation end, Sir' ) parsed_file = open(file_name) engine = Engine(start_date, end_date, parsed_file) engine.invest()