Example #1
0
    def eval(self):
        result = False
        if self.is_inc is not None and self.number_of_days is not None \
                and self.growth_percent is not None and self.accessor_method is not None \
                and self.symbol_id is not None:
            current_day_str = dateConv.to_str(self.world.current_day)
            start_day_str = dateConv.get_date_str_back_x(
                current_day_str, self.number_of_days)
            curr_value = self.accessor_method(self.symbol_id, current_day_str)
            start_value = self.accessor_method(self.symbol_id, start_day_str)

            if self.is_inc:
                if curr_value > start_value:
                    change = (curr_value - start_value) * 100 / start_value
                    result = self.growth_percent < change
                else:
                    result = False
            elif not self.is_inc:
                if curr_value < start_value:
                    change = (start_value - curr_value) * 100 / start_value
                    result = self.growth_percent < change
                else:
                    result = False
        else:
            result = False
        if self.is_negated:
            return not result
        else:
            return result
Example #2
0
 def add_exchange_price(self, date_str, price):
     present_rate = self.__exchange_prices.get(date_str)
     if present_rate is not None:
         raise ValueError("Sir you are trying to redefine " + self.name + "on the day of " + date_str)
     else:
         self.__exchange_prices[date_str] = price
         if self.__earliest_date is None or dateConv.to_date(date_str) < self.__earliest_date:
             self.__earliest_date = dateConv.to_date(date_str)
Example #3
0
 def get_exchange_rate(self, date_str):
     asked_rate = self.__exchange_rates.get(date_str)
     if dateConv.to_date(date_str) < self.__earliest_date:
         raise ValueError("Trying to obtain non existing data, Sir")
     if asked_rate is not None:
         return asked_rate
     else:
         new_date = dateConv.to_date(date_str) - timedelta(1)
         return self.get_exchange_rate(dateConv.to_str(new_date))
Example #4
0
def parse_event(event_elem, symbol_table, file_name, counter):
    event_type = event_elem.find('type')
    event_name = event_elem.find('name')
    event_value = event_elem.find('value')
    event_date = event_elem.find('date')
    if event_type is None or event_name is None or event_value is None or event_date is None:
        raise ValueError(
            "An event must consist of name, type, value, date elements, one is missing in "
            + file_name + " at " + str(counter) + " , Sir")
    if event_type.text == "" or event_name.text == "" or event_value.text == "" or event_date.text == "":
        raise ValueError(
            "An event must consist of name, type, value, date elements, one is empty in "
            + file_name + " at " + str(counter) + " , Sir")
    symbol_id = None
    if event_type.text == EventType.CURRENCY.name:
        symbol_id = symbol_table.get_currency(event_name.text)
    elif event_type.text == EventType.STOCK.name:
        symbol_id = symbol_table.get_stock(event_name.text)
    else:
        raise ValueError(
            "Terribly sorry Sir, but type must be CURRENCY or STOCK in " +
            file_name + " at " + str(counter))
    if not dateConv.is_date_str_valid_format(event_date.text):
        raise ValueError(
            "Terribly sorry Sir, but date format is incorrect. It should be yyyy.MM.dd in "
            + file_name + " at " + str(counter))
    return Event(event_type.text, event_date.text, symbol_id,
                 float(event_value.text))
Example #5
0
 def run_reality(self, date_start, date_stop):
     rules_list = list(self.rules.values())
     rules_list.sort(key=lambda x: x.priority, reverse=True)
     self.world.set_start_date(date_start)
     while True:
         for rule in rules_list:
             rule.execute()
         if dC.to_str(self.world.current_day) == date_stop:
             break
         else:
             self.next_day()
Example #6
0
def _get_date_arg(lexer, engine):
    token = lexer.get_token()
    if token.token_type != TokenType.list_start:
        if token.token_type == TokenType.whitespace:
            return dateConv.to_str(engine.world.current_day)
        else:
            raise ValueError(
                "Forbidden token found, expected whitespace or ( amount" + str(token.token_value) + " ,Sir.")
    else:
        token = utils.get_token_skipping_whitespace(lexer)
        if token.token_type == TokenType.number:
            result = dateConv.get_date_back_x(dateConv.to_str(engine.world.current_day), math.fabs(token.token_value))
            token = utils.get_token_skipping_whitespace(lexer)
            if token.token_type != TokenType.list_end:
                raise ValueError("Expected ) , found: " + str(token.token_value) + " ,Sir.")
            return dateConv.to_str(result)
        if token.token_type == TokenType.date:
            result = token.token_value
            token = utils.get_token_skipping_whitespace(lexer)
            if token.token_type != TokenType.list_end:
                raise ValueError("Expected ) , found: " + str(token.token_value) + " ,Sir.")
            return dateConv.to_str(result)
        raise ValueError(" Expected Number or @date@, found neither, Sir")
Example #7
0
 def test_getCorrectEvents(self):
     symbol_table = SymbolsTable()
     symbol_table.add_currency('YUA')
     symbol_table.add_currency('USD')
     symbol_table.add_stock('CocaCola')
     symbol_table.add_stock('NukaCola')
     parsed_file = open("rule.txt")
     lexer = Lexer(parsed_file)
     controller = RealityController()
     controller.world.current_day = dC.to_date('2016.05.16')
     controller.world.add_currency(Currency('yuan', 'YUA', 0))
     controller.world.add_currency(Currency('usa dollar', 'USD', 1))
     controller.world.add_stock(Stock('CocaCola', 1, 0))
     controller.world.add_stock(Stock('NukaCola', 1, 1))
     controller.add_event(Event(EventType.CURRENCY, '2016.05.16', 0, 500))
     controller.add_event(Event(EventType.CURRENCY, '2016.05.15', 0, 400))
     controller.add_event(Event(EventType.STOCK, '2016.05.16', 0, 500))
     controller.add_event(Event(EventType.STOCK, '2016.05.15', 0, 400))
     controller.add_event(Event(EventType.STOCK, '2016.05.14', 0, 400))
     controller.add_event(Event(EventType.STOCK, '2016.05.13', 0, 400))
     Parser.RuleParser.parse_from_lexer(lexer, symbol_table, controller)
     rule = controller.rules.get(1)
     print(rule.priority)
def parse(args):

    splitPath = args.split("/")
    if len(splitPath):
        fileName = splitPath[len(splitPath) - 1]

    logging.info("FILENAME : " + fileName)
    #PREPROCESSING
    text = DocumentConverter().convert(args)
    print text
    return
    #FIRST PASS
    #Extracted Blocks.

    extracted_blocks, section_blocks = BlockExtractor(text).extractBlocks()

    #Extract only the Personal Information Portion.
    personal_info_sections = [
        "FIRST_SECTION_OF_RESUME", "PERSONAL INFORMATION", "PERSONAL DETAIL",
        "PERSONAL DETAILS", "PERSONAL PROFILES", "PERSONAL DOSSIER",
        "PERSONAL PROFILE", "PERSONAL VITAE", "PERSONAL DATA",
        "PERSONAL SNIPPETS", "CONTACT DETAILS", "PERSONAL PARTICULAR",
        "PERSONAL PARTICULARS", "PERSONAL INFORMAIION", "PERSONAL MEMORANDA",
        "OTHER DETAIL", "OTHER DETAILS"
    ]

    phoneNumberExtracted = False
    emailAddressExtracted = False
    nameExtracted = False
    dobExtracted = False
    allExtracted = False
    locationExtracted = False
    genderExtracted = False
    fatherNameExtracted = False
    motherNameExtracted = False
    maritalNameExtracted = False
    nationalityExtracted = False
    languagesExtracted = False
    passportExtracted = False
    licenseExtracted = False
    panExtracted = False
    addressExtracted = False
    probableNameFromFileName = None
    probableNameFromSection = None
    probableNameFromEmail = None
    probable_mobiles = []
    probable_emails = []
    probableName = ""
    possibleLocation = ""
    gender = ""
    fathername = ""
    mothername = ""
    maritalstate = ""
    nationality = ""
    languages = ""
    passportno = ""
    licenseno = ""
    panno = ""
    tmeinextrname = 0

    probableNameFromFileName = getNameFromFileName(fileName)

    if probableNameFromFileName:
        splitname = probableNameFromFileName.split()
        if len(splitname) > 1:
            #Check whether the word is there in the Opening Section of the resume.
            if splitname[0].upper(
            ) in extracted_blocks["FIRST_SECTION_OF_RESUME"][1].upper():
                if splitname[1].upper(
                ) in extracted_blocks["FIRST_SECTION_OF_RESUME"][1].upper():
                    probableName = " ".join(
                        [splitname[0].upper(), splitname[1].upper()])
                    logging.info("NAME :" + probableNameFromFileName)

                else:
                    probableNameFromFileName = splitname[0]
        else:
            if splitname[0].upper(
            ) not in extracted_blocks["FIRST_SECTION_OF_RESUME"].upper():
                probableNameFromFileName = None

    #Years of Experience Extraction.
    possibleYrsOfExp = getYrsFromFileName(fileName)

    #Location from File_name.
    possibleLocation = getLocation(fileName, section_blocks)

    if possibleYrsOfExp:
        logging.info("YRS OF EXP :" + str(possibleYrsOfExp))

    #Iterate over the possible section header paragraphs to find the 3 Nouns .
    for section in personal_info_sections:
        if allExtracted:
            break

        personal_info = ""
        if extracted_blocks.has_key(section):
            personal_info = extracted_blocks[section][1]

        #Second Pass
        personal_info = junkCharactersRemoveRegx.sub("", personal_info)

        #Remove common words not appearing in name, email, ph
        personal_info = extraHeadersRemoveRegx.sub("", personal_info)

        #Replace tabs and 4 white spaces with a new Line.
        personal_info = replaceTabsRegx.sub(" * ", personal_info)

        if extracted_blocks.has_key(section):
            if not addressExtracted:
                address = AddressExtraction(
                    (section, extracted_blocks[section][1])).get_address()
                if address:
                    addressExtracted = True

        #Iterate over each line of section and find name, email and ph
        for line in personal_info.split("\n"):
            line = line.strip()

            if not line:
                continue

            if not emailAddressExtracted:
                email = ""
                lineparts = notemailRegx.split(line)
                for linepart in lineparts:
                    emailRes = emailRegx.search(linepart)
                    if emailRes:
                        email = emailRes.group().strip()
                        if email:
                            logging.info("Email : " + email)

                            probable_emails.append(email)
                            emailAddressExtracted = True

            if not phoneNumberExtracted:
                contact_number = ""
                lineparts = notphonenoRegx.split(line)

                for linepart in lineparts:
                    noRes = phonenoRegx.search(linepart)
                    if noRes:
                        contact_number = noRes.group().strip()
                        contact_number = removebrackethypenRegex.sub(
                            "", contact_number).strip()
                        #Find whether there is a balanced parenthesis
                        if len(re.findall(r'\([^\)]*\)', contact_number)) == 0:
                            #Replace all the parenthesis from the ends.
                            contact_number = contact_number.replace(
                                "(", "").replace(")", "")

                        if len(re.findall(r'\d', contact_number)) >= 10:
                            logging.info("NUMBER :" + contact_number)
                            probable_mobiles.append(contact_number)
                            phoneNumberExtracted = True

            if not dobExtracted:
                probable_dob = ""
                if "birth" in line.lower() and re.search(
                        r'birth\W', line, re.IGNORECASE):
                    probable_dob = dateOfBirthRearCleaningRegex.sub(
                        "", dateOfBirthFrontCleaningRegex.sub("", line))
                    print probable_dob
                    probable_dob = DateConverter(probable_dob).getDate()
                    logging.info("STANDARD DOB :" + probable_dob)
                    dobExtracted = True

                if "DOB" in line or "Dob" in line or "d.o.b" in line.lower(
                ) or "d-o-b" in line.lower():
                    probable_dob = dateOfBirthRearCleaningRegex.sub(
                        "", dobFrontCleaningRegex.sub("", line))
                    print probable_dob
                    probable_dob = DateConverter(probable_dob).getDate()
                    logging.info("STANDARD DOB :" + probable_dob)
                    dobExtracted = True

            #Name Extraction.
            # The general assumption is that the Name of the applicant occurs in First Few Lines of a Resume.
            # We consider lines less than 6 words , As name cannot be more than six.
            # We POS Tag it and we select the first 3 consective NN* words .
            if not nameExtracted:
                probableNameFromSection = ""
                junkStrippedline = removeJunktillAphaRegx.sub("", line)
                words = junkStrippedline.split()
                if len(words) < 7:

                    #tag words
                    taggedWords = nltk.pos_tag(words)

                    if len(taggedWords) > 1 and taggedWords[1][1].startswith(
                            "N"):
                        if len(taggedWords
                               ) > 2 and taggedWords[2][1].startswith("N"):
                            probableNameFromSection = taggedWords[0][
                                0] + " " + taggedWords[1][
                                    0] + " " + taggedWords[2][0]
                        else:
                            probableNameFromSection = taggedWords[0][
                                0] + " " + taggedWords[1][0]
                    else:

                        if len(taggedWords
                               ) > 0 and taggedWords[0][1].startswith("N"):
                            probableNameFromSection = taggedWords[0][0]

                    probableNameFromSection = probableNameFromSection.strip()
                    if probableNameFromSection:
                        probableNameFromSection = re.sub(
                            r'\W', " ",
                            probableNameFromSection).replace("  ", " ")
                        nameExtracted = True

            if not genderExtracted:

                gendermatchobject = GenderRegx.match(line)
                if gendermatchobject:
                    gender = gendermatchobject.group(3).strip()
                    genderExtracted = True

            if not fatherNameExtracted:
                fathernamematchobject = FatherNameRegx.match(line)
                if fathernamematchobject:
                    fathername = fathernamematchobject.group(2).strip()
                    fatherNameExtracted = True

            if not motherNameExtracted:

                mothernamematchobject = MotherNameRegx.match(line)
                if mothernamematchobject:
                    mothername = mothernamematchobject.group(2).strip()
                    motherNameExtracted = True

            if not maritalNameExtracted:
                maritalstatusobject = MaritalNameRegx.match(line)
                if maritalstatusobject:
                    maritalstate = maritalstatusobject.group(2).strip()
                    maritalNameExtracted = True

            if not nationalityExtracted:
                nationalityobject = NationalityNameRegx.match(line)
                if nationalityobject:
                    nationality = nationalityobject.group(2).strip()
                    nationalityExtracted = True

            if not languagesExtracted:
                languagesobject = LanguagesRegx.match(line)
                if languagesobject:
                    languages = languagesobject.group(2).strip()
                    languagesExtracted = True

            if not passportExtracted:
                passportobject = PassportRegx.match(line)
                if passportobject:
                    passportno = passportobject.group(2).strip()
                    passportExtracted = True

            if not licenseExtracted:
                licenseobject = LicenceRegx.match(line)
                if licenseobject:
                    licenseno = licenseobject.group(2).strip()
                    licenseExtracted = True

            if not panExtracted:
                panobject = PanNumberRegx.match(line)
                if panobject:
                    panno = panobject.group(2).strip()
                    panExtracted = True

            if phoneNumberExtracted and emailAddressExtracted and nameExtracted and dobExtracted and genderExtracted and fatherNameExtracted and mothernamematchobject and maritalNameExtracted and nationalityExtracted and languagesExtracted and passportExtracted and licenseExtracted and panExtracted:
                allExtracted = True
                break
    #Create List.

    listHR = []

    if emailAddressExtracted and probable_emails:

        try:
            probableNameFromEmail = re.match(r'(.*)@.*',
                                             probable_emails[0]).group(1)
        except Exception as e:
            probableNameFromEmail = probable_emails[0]
        probableNameFromEmail = re.sub(
            r'\s+', " ", re.sub(r'\W', " ", probableNameFromEmail))
    nameExtracted = True

    if nameExtracted:
        list = decideName(probableNameFromFileName, probableNameFromSection,
                          probableNameFromEmail)
        print list
        print "TIME TO EXTRACT NAME(sec):\t", tmeinextrname
        FirstName = list[0]
        LastName = ""
        if len(list) > 1:
            LastName = " ".join(list[1:])

        listHR.append(("GivenName", FirstName))
        if LastName:
            listHR.append(("FamilyName", LastName))

    if emailAddressExtracted:
        if probable_emails:
            listHR.append(("Email", ','.join(probable_emails)))

    if phoneNumberExtracted:
        if probable_mobiles:
            listHR.append(("Mobile", ','.join(probable_mobiles)))

    if dobExtracted:
        listHR.append(("DateOfBirth", probable_dob))

    if possibleYrsOfExp:
        listHR.append(("YrsOfExp", str(possibleYrsOfExp)))

    if possibleLocation:
        listHR.append(("Location", possibleLocation))

    if gender:
        listHR.append(("gender", str(gender)))

    if fathername:
        listHR.append(("FatherName", str(fathername)))

    if mothername:
        listHR.append(("MotherName", str(mothername)))

    if maritalstate:
        listHR.append(("MaritalState", str(maritalstate)))

    if nationality:
        listHR.append(("Nationality", str(nationality)))

    if languages:
        listHR.append(("Languages", str(languages)))

    if passportno:
        listHR.append(("PassportNo", str(passportno)))

    if licenseno:
        listHR.append(("LicenseNo", str(licenseno)))

    if panno:
        listHR.append(("PanNo", str(panno)))

    return listHR
Example #9
0
 def get_stock_price_now(self, symbol_id):
     return self.get_stock_price(symbol_id,
                                 dateConv.to_str(self.current_day))
Example #10
0
 def get_currency_rate_now(self, symbol_id):
     return self.get_currency_rate(symbol_id,
                                   dateConv.to_str(self.current_day))
Example #11
0
 def next_day(self):
     self.current_day = dateConv.next_day(dateConv.to_str(self.current_day))
Example #12
0
 def set_start_date(self, start_date_str):
     self.current_day = dateConv.to_date(start_date_str)
Example #13
0
import sys
from Engine.Engine import Engine
import Utils.DateConverter as dateConv

file_name = sys.argv[1]
start_date = sys.argv[2]
end_date = sys.argv[3]
if not dateConv.is_date_str_valid_format(start_date):
    raise ValueError(
        'Date format for start date is not acceptable Sir. It should be yyyy.MM.dd'
    )
if not dateConv.is_date_str_valid_format(end_date):
    raise ValueError(
        'Date format for end date is not acceptable Sir. It should be yyyy.MM.dd'
    )
if not dateConv.after(start_date, end_date):
    raise ValueError(
        'Date of simulation start should be before date of simulation end, Sir'
    )
parsed_file = open(file_name)
engine = Engine(start_date, end_date, parsed_file)
engine.invest()