Example #1
0
 def __check_if_japanese(self, text, syllables):
     name_list = []
     names = self.__get_human_names(text)
     for name in names:
         if self.__word_break(HumanName(name).last.lower(), syllables) and \
                 self.__word_break(HumanName(name).first.lower(), syllables):
             name_list.append(name)
     return name_list
Example #2
0
    def __init__(self,
                 account,
                 name,
                 role,
                 start_date,
                 end_date,
                 hourly_rate,
                 utilization,
                 client,
                 office=None):
        self.role = role
        self.account = account
        parsed_name = HumanName(name.lower())
        self.first_name = parsed_name.first
        self.middle_name = parsed_name.middle
        self.last_name = parsed_name.last

        self.client = client
        self.office = office
        self.converted_fulltime = False

        self.start_date = start_date
        self.end_date = end_date
        self.hourly_rate = hourly_rate
        self.utilization = utilization
Example #3
0
def impute_names_model(name):
    human = HumanName(name)
    return {
        'given_name': human.first,
        'middle_names': human.middle,
        'family_name': human.last,
        'suffix': human.suffix,
    }
Example #4
0
def impute_names(name):
    human = HumanName(name)
    return {
        'given': human.first,
        'middle': human.middle,
        'family': human.last,
        'suffix': human.suffix,
    }
Example #5
0
def get_human_names(text):
    en = spacy.load('en')

    sents = en(text)
    person_list = []
    for word in sents.ents:
        if word.label_ == 'PERSON':
            person_list.append(str(word).replace('\n', ''))

    result = []

    for name in person_list:

        try:

            first_last = str(HumanName(name).first).replace(
                ' ', '') + ' ' + str(HumanName(name).last).replace(' ', '')

            result.append(first_last)
        except:
            print('error')

    return result
Example #6
0
    def findGossip(self, tweets, keywords, hashtag):
        '''
        From a collection of tweets, apply the logic to find duplicates and refine text 
        (commented out for now)
        '''
        self.tweets = tweets
        self.hits = {}
        self.logger.info("Fetching latest gossip from collection of tweets")
        keywords = []  # ['bid','reports','according','sign']
        team = hashtag.split('#')[1]
        refined_text = self.tweets  #self.removeTeamPlayers(team)
        keyword_tweets = refined_text  #[tweet for tweet in refined_text for keyword in keywords if keyword in tweet]
        tweets = self.removeDuplicates(keyword_tweets)
        players = []
        for match in tweets:
            hits = self.findTargets(match)
            for name in hits:
                last_first = HumanName(name).last + ', ' + HumanName(
                    name).first
                players.append(last_first)

        players = self.findDuplicates(players)
        self.hits = {player: len(hit) for player, hit in players.iteritems()}
        self.logger.info('Things gossiped about {}'.format(self.hits))
Example #7
0
def preProcess(names):
    #Converting names to HumanName class objects
    fullName = []
    for x,name in names.iterrows():
        fullName.append(HumanName(name.fn + ' ' + name.ln))
    names['fullName'] = fullName
    
    #Converting dob to pandas.datetime object (assuming that entries are not older than 100 years)
    dobs = []
    for entry in names.dob:
        if(int(entry[-2:]) < 18):
            dob = 2000+int(entry[-2:])
        else:
            dob = 1900+int(entry[-2:])
        dobs.append(str(entry[:-2])+str(dob))
    names.dob = dobs
    names.dob = pd.to_datetime(names.dob)
Example #8
0
    def __init__(self, account, name, email, office, role, salary, start_date):
        self.account = account

        parsed_name = HumanName(name.lower())
        self.first_name = parsed_name.first
        self.middle_name = parsed_name.middle
        self.last_name = parsed_name.last

        self.email = email.lower()

        self.is_administrator = False
        self.is_hr_administrator = False
        self.employee_assignment_access = False
        self.permissions_global_pipeline = False
        self.permissions_global_financials = False
        self.permissions_global_utilization = False

        self.office = office
        self.role = role
        self.salary = salary

        self.is_active = True

        self.created_at = db_utc_now()
        self.start_date = db_utc_now()
        self.percent_billable = 100

        if role.department is not None:
            self.department = self.role.department
        else:
            self.department = None
            #self.next_role = self.role.next_role

        self.employee_number = 0

        self.end_date = None
        self.start_date = start_date

        jh = JobHistoryEntry(self, role, salary)
        self.job_history.append(jh)
Example #9
0
def get_human_names(text):
    tokens = nltk.tokenize.word_tokenize(text)
    pos = nltk.pos_tag(tokens)
    sentt = nltk.ne_chunk(pos, binary=False)
    names = []
    person = []
    name = ""
    for subtree in sentt.subtrees(filter=lambda t: t.label() == 'PERSON'):
        for leaf in subtree.leaves():
            person.append(leaf[0])
        if len(person) > 1:  # ignore surnames only
            for part in person:
                name += part + ' '
            if name[:-1] not in names:
                names.append(name[:-1])
            name = ''
        person = []
    print(names)
    first_names = []
    for name in names:
        first_name = HumanName(name).first
        first_names.append(first_name)
    return first_names
Example #10
0
def person_edit(request):
    try:
        account_id = long(request.session['aid'])
        user_id = long(request.session['uid'])
        user = DBSession.query(User).filter_by(id=user_id).first()
        account = DBSession.query(Account).filter_by(id=account_id).first()

        if user is None or account is None or (user.is_administrator == False and user.is_hr_administrator == False):
            return HTTPFound(request.application_url)

        person_id = long(request.matchdict['person_id'])
        person = DBSession.query(User).filter_by(id=person_id).first()

        if request.method == "GET":
            try:
                source = request.params["source"]
            except:
                source = 'administration'

        if request.method == "POST":
            name = request.params["name"].lower()

            employee_number = request.POST.get('employee_number')
            source = request.params["source"]
            email = request.params["email"].lower()
            salary = long(request.params["salary"])

            is_raise = request.POST.get('raise')

            change_allocation = request.POST.get('change_allocation')

            office_id = request.POST.get('office_id')
            if office_id == '':
                office = None
            else:
                office_id = long(office_id)
                office = DBSession.query(Office).filter_by(id=office_id).filter_by(account_id=account_id).first()
            role_id = long(request.params["role_id"])
            role = DBSession.query(Role).filter_by(id=role_id).filter_by(account_id=account_id).first()
            percent_billable = long(request.params["percent_billable"])

            start_date_text = request.POST.get('start_date')
            if start_date_text == '':
                start_date = datetime.datetime.now()
            else:
                start_dateparts = start_date_text.split("/")
                start_date = datetime.date(long(start_dateparts[2]), long(start_dateparts[0]), long(start_dateparts[1]))

            end_date_text = request.POST.get('end_date')
            if end_date_text == '':
                end_date = None
            else:
                end_dateparts = end_date_text.split("/")
                end_date = datetime.date(long(end_dateparts[2]), long(end_dateparts[0]), long(end_dateparts[1]))

            currency_id = request.POST.get('currency_id')
            if currency_id != '':
                currency_id = long(currency_id)
                currency = DBSession.query(Currency).filter_by(id=currency_id).first()
            else:
                currency = None

            is_a = long(request.POST.get('is_administrator', '0'))
            if is_a == 1:
                is_administrator = True
            else:
                is_administrator = False

            is_h_a = long(request.POST.get('is_hr_administrator', '0'))
            if is_h_a == 1:
                is_hr_administrator = True
            else:
                is_hr_administrator = False

            is_e_a = long(request.POST.get('employee_assignment_access', '0'))
            if is_e_a == 1:
                employee_assignment_access = True
            else:
                employee_assignment_access = False


            u = DBSession.query(User).filter_by(email=email).first()
            if u is None or u.id == person_id:
                permissions_office_financials = request.params.getall("permissions_office_financials")
                permissions_global_financials = False
                for office_id in permissions_office_financials:
                    if office_id == "all":
                        permissions_global_financials = True
                        break
                permissions_office_utilization = request.params.getall("permissions_office_utilization")
                permissions_global_utilization = False
                for office_id in permissions_office_utilization:
                    if office_id == "all":
                        permissions_global_utilization = True
                        break
                permissions_office_pipeline = request.params.getall("permissions_office_pipeline")
                permissions_global_pipeline = False
                for office_id in permissions_office_pipeline:
                    if office_id == "all":
                        permissions_global_pipeline = True
                        break

                parsed_name = HumanName(name.lower())
                person.first_name = parsed_name.first
                person.middle_name = parsed_name.middle
                person.last_name = parsed_name.last
                person.employee_number = employee_number
                person.email = email.lower()
                person.salary = salary
                person.office = office
                person.role = role
                person.percent_billable = percent_billable
                person.start_date = start_date
                person.end_date = end_date
                person.currency = currency
                person.is_administrator = is_administrator
                person.is_hr_administrator = is_hr_administrator
                person.employee_assignment_access = employee_assignment_access
                person.permissions_global_financials = permissions_global_financials
                person.permissions_global_utilization = permissions_global_utilization
                person.permissions_global_pipeline = permissions_global_pipeline

                person.permissions_office_financials = []
                if person.permissions_global_financials == False:
                    for office_id in permissions_office_financials:
                        office = DBSession.query(Office).filter_by(account_id=account_id).filter_by(
                            id=office_id).first()
                        if office is not None:
                            person.permissions_office_financials.append(office)
                person.permissions_office_utilization = []
                if person.permissions_global_utilization == False:
                    for office_id in permissions_office_utilization:
                        office = DBSession.query(Office).filter_by(account_id=account_id).filter_by(
                            id=office_id).first()
                        if office is not None:
                            person.permissions_office_utilization.append(office)
                person.permissions_office_pipeline = []
                if person.permissions_global_pipeline == False:
                    for office_id in permissions_office_pipeline:
                        office = DBSession.query(Office).filter_by(account_id=account_id).filter_by(
                            id=office_id).first()
                        if office is not None:
                            person.permissions_office_pipeline.append(office)
                person.permissions_client_financials = []
                permissions_client_financials = request.params.getall("permissions_client_financials")
                for client_id in permissions_client_financials:
                    client = DBSession.query(Client).filter_by(account_id=account_id).filter_by(id=client_id).first()
                    if client is not None:
                        person.permissions_client_financials.append(client)
                person.permissions_client_utilization = []
                permissions_client_utilization = request.params.getall("permissions_client_utilization")
                for client_id in permissions_client_utilization:
                    client = DBSession.query(Client).filter_by(account_id=account_id).filter_by(id=client_id).first()
                    if client is not None:
                        person.permissions_client_utilization.append(client)
                person.permissions_client_pipeline = []
                permissions_client_pipeline = request.params.getall("permissions_client_pipeline")
                for client_id in permissions_client_pipeline:
                    client = DBSession.query(Client).filter_by(account_id=account_id).filter_by(id=client_id).first()
                    if client is not None:
                        person.permissions_client_pipeline.append(client)
                person.permissions_department_financials = []
                permissions_department_financials = request.params.getall("permissions_department_financials")
                for department_id in permissions_department_financials:
                    department = DBSession.query(Department).filter_by(account_id=account_id).filter_by(
                        id=client_id).first()
                    if department is not None:
                        person.permissions_department_financials.append(department)
                person.permissions_department_utilization = []
                permissions_department_utilization = request.params.getall("permissions_client_utilization")
                for department_id in permissions_client_utilization:
                    department = DBSession.query(Department).filter_by(account_id=account_id).filter_by(
                        id=department_id).first()
                    if department is not None:
                        person.permissions_department_utilization.append(department)

                s = DBSession.query(Salary).filter_by(user_id=person.id).order_by(Salary.start_date.desc()).first()

                if (is_raise is not None or change_allocation is not None) and (datetime.datetime.now().date() != s.start_date.date()):
                    s = Salary(person, salary, role.id, datetime.datetime.now(), percent_billable)
                    DBSession.add(s)
                else:
                    s.salary = salary
                    s.role_id = int(role.id)
                    s.percent_billable = percent_billable

                DBSession.flush()

                if source == "reviews":
                    return HTTPFound(request.application_url + "/people/all/all/all")
                else:
                    return HTTPFound(request.application_url + "/administration/employees")

        departments = DBSession.query(Department).filter_by(account_id=account_id).all()
        offices = DBSession.query(Office).filter_by(account_id=account_id).all()
        clients = DBSession.query(Client).filter_by(account_id=account_id).all()

        roles = DBSession.query(Role).filter_by(account_id=account_id).all()
        currencies = DBSession.query(Currency).filter_by(account_id=account_id).all()

        return dict(logged_in=authenticated_userid(request), header=Header(source), departments=departments,
                    offices=offices, clients=clients, roles=roles, user=user, person=person, currencies=currencies)
    except:
        traceback.print_exc()
        return HTTPFound(request.application_url)
            print(url)
            #print()
            print(e)

            From_KnownNames.clear()
            TEXT.clear()
            counter = counter + 1
            topStoriesDict[key] = current
        try:
            ####################################### Beautiful Soup ######################################################
            theStorySoFar = urlToString(url)
            ####################################### NLTK ##########################################
            names = get_human_names(theStorySoFar)
            nameListNLTK = []
            for name in names:
                firname = str(HumanName(name).first)
                laname = str(HumanName(name).last)
                try:
                    alphabetize(firname)
                    alphabetize(laname)
                except Exception as e:
                    blankvar = 0
                    #print()
                    #print("Either the first or last name is missing, so alphabetize failed. No big deal")
                if name not in current.author:
                    nameListNLTK.append(name)

            thisArticlesSourcesDict["HTML_BS"]["NLTK"] = nameListNLTK
            #print("NLTKS are: ", nameListNLTK)
            current.sources = thisArticlesSourcesDict
Example #12
0
def last_and_first_name(name_list1, name_list2):

    name_listf = []
    name_listl = []
    name_list1f = []
    name_list1l = []
    name_list2f = []
    name_list2l = []

    for name in name_list1:
        first = ""
        last = ""
        #extra logic needed for those first names which are put in suffix part of name
        if "." in HumanName(name).suffix:
            na = HumanName(name).suffix.split(',')[1]
            first = na.split()[0]
            last = HumanName(name).first
        else:
            first = HumanName(name).first
            last = HumanName(name).last
        name_list1f.append(first)
        name_list1l.append(last)

    for name in name_list2:
        first = ""
        last = ""
        #extra logic needed for those first names which are put in suffix part of name
        if "." in HumanName(name).suffix:
            na = HumanName(name).suffix.split(',')[1]
            first = na.split()[0]
            last = HumanName(name).first
        else:
            first = HumanName(name).first
            last = HumanName(name).last
        name_list2f.append(first)
        name_list2l.append(last)

    # for item in name_list:
    # 	print item
    name_listf = name_list1f + name_list2f
    name_listl = name_list1l + name_list2l
    with open('output.csv', 'w') as csvfile:
        fieldnames = ['first_name', 'last_name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for row in zip(name_listf, name_listl):
            writer.writerow({'first_name': row[0], 'last_name': row[1]})

    return (name_list1f, name_list1l, name_list2f, name_list2l)
Example #13
0
    def process(self, statement):
        #Question process function
        statement = str(statement).lower()
        fields = [
            'classes', 'class', 'deliverables', 'practicals', 'theoreticals',
            'defense', 'evaluation', 'exame', 'aula'
        ]
        temporals = ['today', 'tomorrow', 'next']
        field = []
        temporal = []
        statem = ''

        statetokens = word_tokenize(str(statement))
        names = self.get_human_names(str(statement))
        classes = self.get_classes_names(str(statement))

        #remove capitalizations
        for i in range(len(statetokens)):
            statetokens[i] = statetokens[i].lower()
        #Portuguese converter
        for token in statetokens:
            if (token in fields):
                if (token == 'classes' or token == 'class' or token == 'aula'):
                    field.append('Aula')
                else:
                    field.append(token)
            if (token in temporals):
                temporal.append(token)
            if (self.is_date(token)):
                dateparse = parse(token)
                temporal.append(token)

        print(field)
        print(temporal)

        if ((len(field) == 0) or (len(temporal) == 0)):
            response = Statement('Sorry didnt understand')
            response.confidence = 0
            return response

        for name in names:
            last_first = HumanName(name).first
            field.append(last_first)

        saved = []
        k = 0
        INFORMATIONGIVEN = 1
        #-------------------------------------------------MultipleInputs----------------------------------------------------------------------------
        #Section for understanding if there are more than one intention
        #Based on a if else tree based on the detection of intents
        if ((len(field) > 1) or (len(temporal) > 1)):
            if (('or' in statetokens) or ('and' in statetokens)):
                for i in range(0, len(field)):
                    for j in range(0, len(temporal)):
                        temporalsend = ''.join(temporal[j])
                        fieldsend = ''.join(field[i])
                        statem = statem + self.make_statement_from_select(
                            self.get_specific_classes(fieldsend, temporalsend),
                            INFORMATIONGIVEN)
                statem = 'You have:' + statem
                response = Statement(statem)
                response.confidence = 1
                return response
            elif (('with' in statetokens)):
                for i in range(0, len(field)):
                    for j in range(0, len(temporal)):
                        temporalsend = ''.join(temporal[j])
                        fieldsend = ''.join(field[i])
                        help = self.get_specific_classes(
                            fieldsend, temporalsend)
                        rets = []
                        if k == 0:
                            saved = help
                            k = k + 1
                        else:
                            for term in help:
                                if (term in saved):
                                    rets.append(term)
                statem = 'You have:' + self.make_statement_from_select(
                    rets, INFORMATIONGIVEN)
                response = Statement(statem)
                response.confidence = 1
                return response
            else:
                if (len(classes) > 0):
                    for j in range(0, len(temporal)):
                        for i in range(0, len(classes)):
                            temporalsend = ''.join(temporal[j])
                            fieldsend = ''.join(classes[i])
                            fetchedevents = self.get_named_classes(
                                fieldsend, temporalsend)
                            statem = statem + self.statement_parsing(
                                fetchedevents)

                        response = Statement(statem)
                        response.confidence = 1
                        return response
                else:
                    response = Statement(
                        'Im not understanding your question very much, please reformulate it'
                    )
                    response.confidence = 0
                    return response
#-------------------------------------------------------------------------------------------------------------------------------------------

#----------------------------------------------Single Inputs, need better modularization----------------------------------------------------
        else:
            #Informal decision tree creation
            if ('Aula' in field):
                if ('next' in temporal):
                    statem = self.make_statement_from_select(
                        self.get_next_classes(), INFORMATIONGIVEN)
                    statem = 'Next, you have:' + statem
                    response = Statement(statem)
                    response.confidence = 1
                    return response
                else:
                    dateformated = ''.join(temporal)
                    datefind = parse(dateformated)
                    fetchedevents = self.get_specifictime_classes(datefind)
                    statem = self.statement_parsing(fetchedevents)
                    response = Statement(statem)
                    response.confidence = 1
                    return response

            if ('deliverables' in field):
                dateformated = ''.join(temporal)
                fetchedevents = self.get_specific_classes(field, dateformated)
                statem = statem + self.statement_parsing(fetchedevents)
                response = Statement(statem)
                response.confidence = 1
                return response
            if ('practical' in field):
                dateformated = ''.join(temporal)
                fetchedevents = self.get_specific_classes(field, dateformated)
                statem = statem + self.statement_parsing(fetchedevents)
                response = Statement(statem)
                response.confidence = 1
                return response
            if ('theoretical' in field):
                dateformated = ''.join(temporal)
                fetchedevents = self.get_specific_classes(field, dateformated)
                statem = statem + self.statement_parsing(fetchedevents)
                response = Statement(statem)
                response.confidence = 1
                return response
            if ('defense' in field):
                dateformated = ''.join(temporal)
                fetchedevents = self.get_specific_classes(field, dateformated)
                statem = statem + self.statement_parsing(fetchedevents)
                response = Statement(statem)
                response.confidence = 1
                return response
            if ('evaluation' in field):
                dateformated = ''.join(temporal)
                fetchedevents = self.get_specific_classes(field, dateformated)
                statem = statem + self.statement_parsing(fetchedevents)
                response = Statement(statem)
                response.confidence = 1
                return response
            if ('exame' in field):
                dateformated = ''.join(temporal)
                fetchedevents = self.get_specific_classes(field, dateformated)
                statem = statem + self.statement_parsing(fetchedevents)
                response = Statement(statem)
                response.confidence = 1
                return response

            response = Statement(
                'Im dont understand very much, please reformulate the question friend'
            )
            response.confidence = 0
            return response
                TEXT.append(arrowsplitend[0])
            fulltext = ' '.join(TEXT)
            #print(counter,",",fulltext)
            #print("Something Happened...is Good.")
            text = fulltext
            names = get_human_names(text)
            nameCountiterate = 0
            #####
            ##            print("Names being added from", key, " are: ")
            ##            for name in names:
            ##                print(name)
            #####

            for name in names:
                sourceNumber = ("source" + str(nameCountiterate))
                firname = str(HumanName(name).first)
                laname = str(HumanName(name).last)
                try:
                    alphabetize(firname)
                    alphabetize(laname)
                except Exception as e:
                    blankvar = 0
                    #print()
                    #print("Either the first or last name is missing, so alphabetize failed. No big deal")
                    #print(e)
                thisArticlesSourcesDict["NLTK"][name] = {
                    "sourceNumber": sourceNumber,
                    "firstName": HumanName(name).first,
                    "lastName": HumanName(name).last
                }
                current.sources = thisArticlesSourcesDict
Example #15
0
 def get_name(self):
     name = HumanName(self.full_name)
     name.capitalize()
     return name
Example #16
0
def freelancer_edit(request):
    try:
        freelancer_id = long(request.matchdict['freelancer_id'])

        account_id = long(request.session['aid'])
        user_id = long(request.session['uid'])
        user = DBSession.query(User).filter_by(id=user_id).first()
        account = DBSession.query(Account).filter_by(id=account_id).first()

        if user is None or account is None:
            return HTTPFound(request.application_url)

        freelancer = DBSession.query(Freelancer).filter_by(account_id=account_id).filter_by(id=freelancer_id).first()
        if freelancer is None:
            return HTTPFound(request.application_url)

        if request.method == "POST":
            name = request.params["name"].lower()
            role_id = long(request.params["role_id"])
            client_id = request.params.get('client_id')
            office_id = request.params.get('office_id')

            client = None
            if client_id is not None and client_id != '' and len(client_id) > 0:
                client = DBSession.query(Client).filter_by(account_id=account_id).filter_by(id=long(client_id)).first()

            office = None
            if office_id is not None and office_id != '' and len(office_id) > 0:
                office = DBSession.query(Office).filter_by(account_id=account_id).filter_by(id=long(office_id)).first()

            if client is None and office is None:
                return HTTPFound(request.application_url)

            utilization = long(request.params["utilization"])
            hourly_rate_local = long(request.params["hourly_rate"])
            if user.currency is None:
                hourly_rate = hourly_rate_local
            else:
                hourly_rate = hourly_rate_local * user.currency.currency_to_usd

            start_date_text = request.params["start_date"]
            start_dateparts = start_date_text.split("/")
            start_date = datetime.date(long(start_dateparts[2]), long(start_dateparts[0]), long(start_dateparts[1]))

            end_date_text = request.params["end_date"]
            end_dateparts = end_date_text.split("/")
            end_date = datetime.date(long(end_dateparts[2]), long(end_dateparts[0]), long(end_dateparts[1]))

            role = DBSession.query(Role).filter_by(id=role_id).filter_by(account_id=account_id).first()

            if role is None or office is not None and user.can_access_office(office, "utilization") == False:
                return HTTPFound(request.application_url)

            if client is not None and user.can_access_client(client, "utilization") == False:
                return HTTPFound(request.application_url)

            parsed_name = HumanName(name)
            freelancer.first_name = parsed_name.first
            freelancer.middle_name = parsed_name.middle
            freelancer.last_name = parsed_name.last
            freelancer.office = office
            freelancer.client = client
            freelancer.start_date = start_date
            freelancer.end_date = end_date
            freelancer.utilization = utilization
            freelancer.hourly_rate = hourly_rate
            DBSession.flush()

            if client is not None:
                return HTTPFound(request.application_url + "/client/" + str(client_id) + "/utilization/" + str(
                    datetime.datetime.now().year))

            if office is not None:
                return HTTPFound(request.application_url + "/office/" + str(office_id) + "/utilization/" + str(
                    datetime.datetime.now().year))

        clients_all = DBSession.query(Client).filter_by(account_id=account_id).all()
        clients = []
        if user.is_administrator or user.permissions_global_utilization:
            clients = clients_all
        else:
            for client in clients_all:
                if user.can_access_client(client, "utilization"):
                    clients.append(client)
            if len(clients) == 0:
                print("************* no c")
                return HTTPFound(request.application_url)

        offices_all = DBSession.query(Office).filter_by(account_id=account_id).all()
        offices = []
        if user.is_administrator or user.permissions_global_utilization:
            offices = offices_all
        else:
            for office in offices_all:
                if user.can_access_office(office, "utilization"):
                    offices.append(office)
            if len(offices) == 0:
                print("************* no o")
                return HTTPFound(request.application_url)

        roles = DBSession.query(Role).filter_by(account_id=long(request.session['aid'])).all()

        return dict(logged_in=authenticated_userid(request), header=Header('financials'), clients=clients,
                    offices=offices, roles=roles, freelancer=freelancer, user=user, account=account)
    except:
        print("*************")
        traceback.print_exc()
        return HTTPFound(request.application_url)
        def namethatname(self, results, letter, text, variable, filename): 
            results.write("\n----------------------------------------------")   
            results.write("\nMost common employee names:\n ")
            name = []
            
            def wordCount(self, text, word):
                count = 0
                for t in text:
                    if word.upper() in t.upper():
                        count = count+1 
                return count
                
            def get_human_names(self, text):
                tokens = nltk.tokenize.word_tokenize(text)
                count = 0
                pos = nltk.pos_tag(tokens)
                sentt = nltk.ne_chunk(pos, binary = False)
                person_list = []
                person = []
                name = ""
                namer = []
                endofdays = len(tokens)-1
                for t in tokens:
                    if '*' in t:
                        if count + 1 <= endofdays:
                            firstname = tokens[count + 1]
                            if firstname[0].isupper() == True:
                                name = (tokens[count + 1])
                                if name in namer:
                                    continue
                                else:
                                    namer.append(name)
                        if count + 2 <= endofdays:
                            lastname = tokens[count + 2]
                            if lastname[0].isupper() == True:
                                nam = (name + ' ' + tokens[count + 2])
                                if nam in names:
                                    continue
                                else:
                                    namer.append
                    count = count + 1
            
                for subtree in sentt.subtrees(filter=lambda t: t.label() == 'PERSON'):
                    for leaf in subtree.leaves():
                        person.append(leaf[0])
                        for part in person:
                            name += part + ' '
                        if name[:-1] not in person_list:
                            person_list.append(name[:-1])
                        name = ''
                    person = []
            
                for n in namer:
                    if n not in person_list:
                        n.rstrip()
                        n.lstrip()
                        person_list.append(n)
                return (person_list)        
        
            names = []
            texty = ' '.join(text)
    
            if variable == 1:
                tame = []
                for t in text:
                    if "NEGATIVE" in t.upper():
                        tame.append(t)
                tamer = ' '.join(tame)
                names = get_human_names(self, tamer)
                NotNames = open(str(letter)+":/Comment-Graph Report/User Tools/Not Names.txt").read().splitlines()
                final = []
               
                for n in names: 
                    last = HumanName(n).last
                    first = HumanName(n).first
                    check = first + " " + last
                    check.rstrip()
                    check.lstrip() 
                    if first.upper() not in NotNames and last.upper() not in NotNames:
                        check = first + " " + last 
                        final.append(check)
                        continue;
                    
                    if first.upper() not in NotNames and last.upper() in NotNames:
                        final.append(first)
                        continue;
                    
                    if first.upper() in NotNames and last.upper() not in NotNames:
                        final.append(last)
                        continue;
                countdown = []
                for f in final:
                    if f not in countdown:
                        fan = str(f)
                        fun = fan.strip()
                        if fun != " " and fun != '':
                            countdown.append(fun)
                for c in countdown:
                    num = wordCount(self, tame, c)
                    if num > 1:
                        results.write(str(c) + ' appears this many times: ' + str(wordCount(self, tame, c)))
                
            if variable == 2:
                tame = []
                for t in text:
                    if "POSITIVE" in t.upper():
                        tame.append(t)
                tamer = ' '.join(tame)
                names = get_human_names(self, tamer)
                NotNames = open(str(letter)+":/Comment-Graph Report/User Tools/Not Names.txt").read().splitlines()
                final = []

                for n in names: 
                    last = HumanName(n).last
                    first = HumanName(n).first
                    check = first + " " + last 
                    if first.upper() not in NotNames and last.upper() not in NotNames:
                        check = first + " " + last 
                        if check not in final:
                            final.append(check)
                        continue;
                    
                    if first.upper() not in NotNames and last.upper() in NotNames:
                        if first not in final:
                            first = first.rstrip()
                            first = first.lstrip()
                            final.append(first)
                        continue;
                    
                    if first.upper() in NotNames and last.upper() not in NotNames:
                        if last not in final:
                            last = last.rstrip()
                            last = last.lstrip()
                            final.append(last)
                        continue;
                countdown = []
                for f in final:
                    if f not in countdown:
                        fan = str(f)
                        fun = fan.strip()
                        if fun != " " and fun != '':
                            countdown.append(fun)
                for c in countdown:
                    num = wordCount(self, tame, c)
                    if num > 1:
                        results.write('\n' + str(c) + ' appears this many times: ' + str(wordCount(self, tame, c)))
            
            if variable == 3:
                tame = []
                for t in text:
                    if "NEUTRAL" or "MIXED" in t.upper():
                        tame.append(t)
                tamer = ' '.join(tame)
                names = get_human_names(self, tamer)
                NotNames = open(str(letter)+":/Comment-Graph Report/User Tools/Not Names.txt").read().splitlines()
                final = []
               
                for n in names: 
                    last = HumanName(n).last
                    first = HumanName(n).first
                    check = first + " " + last
                    check.rstrip()
                    check.lstrip() 
                    if first.upper() not in NotNames and last.upper() not in NotNames:
                        check = first + " " + last 
                        final.append(check)
                        continue;
                    
                    if first.upper() not in NotNames and last.upper() in NotNames:
                        final.append(first)
                        continue;
                    
                    if first.upper() in NotNames and last.upper() not in NotNames:
                        final.append(last)
                        continue;
                countdown = []
                for f in final:
                    if f not in countdown:
                        fan = str(f)
                        fun = fan.strip()
                        if fun != " " and fun != '':
                            countdown.append(fun)
                for c in countdown:
                    num = wordCount(self, tame, c)
                    if num > 1:
                        results.write('\n'+str(c) + ' appears this many times: ' + str(wordCount(self, tame, c)))
            
            if variable == 4:
                tame = []
                for t in text:
                    tame.append(t)
                tamer = ' '.join(tame)
                names = get_human_names(self, tamer)
                NotNames = open(str(letter)+":/Comment-Graph Report/User Tools/Not Names.txt").read().splitlines()
                final = []
               
                for n in names: 
                    last = HumanName(n).last
                    first = HumanName(n).first
                    check = first + " " + last
                    check.rstrip()
                    check.lstrip() 
                    if first.upper() not in NotNames and last.upper() not in NotNames:
                        check = first + " " + last 
                        final.append(check)
                        continue;
                    
                    if first.upper() not in NotNames and last.upper() in NotNames:
                        final.append(first)
                        continue;
                    
                    if first.upper() in NotNames and last.upper() not in NotNames:
                        final.append(last)
                        continue;
                countdown = []
                for f in final:
                    if f not in countdown:
                        fan = str(f)
                        fun = fan.strip()
                        if fun != " " and fun != '':
                            countdown.append(fun)
                for c in countdown:
                    num = wordCount(self, tame, c)
                    if num > 1:
                        results.write('\n'+str(c) + ' appears this many times: ' + str(wordCount(self, tame, c)))
            return countdown
Example #18
0
from nameparser.parser import HumanName

def humannames(text):
    tokens = nltk.tokenize.word_tokenize(text)
    pos = nltk.pos_tag(tokens)
    sentiment = nltk.ne_chunk(pos, binary = False)
    list_of_person = []
    person = []
    name = ""
    for subtree in sentiment.subtrees(filter=lambda t: t.label() == 'PERSON'):
        for leaf in subtree.leaves():
            person.append(leaf[0])
        if len(person) > 1:
            for p in person:
                name += p + ' '
            if name[:-1] not in list_of_person:
                list_of_person.append(name[:-1])
            name = ''
        person = []

    return (list_of_person)

with open("pressReleases.csv") as f:
    text = f.read() + '\n'

names = humannames(text)
for name in names:
    print(name)
    last_first = HumanName(name).last + ', ' + HumanName(name).first
    print(last_first)
Example #19
0
def preProcess(names):
    suffixes = ["JR", "SR", "I", "II", "III", "IV"]

    #Converting the dates in proper format
    dobs = []
    for entry in names.dob:
        dob = 1900 + int(entry[-2:])
        if (dob > 2018):
            dob -= 100
        dobs.append(str(entry[:-2]) + str(dob))
    names.dob = dobs
    names.dob = pd.to_datetime(names.dob)

    #Create a new dataframe which compares every entry with every other entry
    entries = []
    for i in range(len(names)):
        for j in range(i + 1, len(names)):
            record = {}
            record['ln1'] = names.ln[i]
            record['dob1'] = names.dob[i]
            record['gn1'] = names.gn[i]
            record['fn1'] = names.fn[i]
            record['ln2'] = names.ln[j]
            record['dob2'] = names.dob[j]
            record['gn2'] = names.gn[j]
            record['fn2'] = names.fn[j]
            entries.append(record)
    namesCross = pd.DataFrame(entries)

    #Comparing DOBs, Father's Names, First Names, Last Names etc
    ln1 = []
    ln2 = []
    gender = []
    father = []
    dob = []
    first = []
    editdist = []
    last = []
    lastFirst = []
    ln1_length = []
    ln2_length = []
    for i in range(len(namesCross)):
        name1 = HumanName(namesCross['ln1'][i].upper())
        name2 = HumanName(namesCross['ln2'][i].upper())

        #Check if gender of both entries is same
        if (namesCross['gn1'][i] == namesCross['gn2'][i]):
            gender.append(1)
        else:
            gender.append(0)

        #Check if father's name for both entries is same
        if (checkFname(namesCross['fn1'][i], namesCross['fn2'][i])):
            father.append(1)
        else:
            father.append(0)

        #Check if DOB for both the entries is same
        if (namesCross['dob1'][i] == namesCross['dob2'][i]):
            dob.append(1)
        else:
            dob.append(0)

        #Check if the first name of both the entries is same
        if (name1.first == name2.first):
            first.append(1)
        else:
            first.append(0)

        #Calculate edit distance between first names of both entries
        editdist.append(editdistance.eval(name1.first, name2.first))

        #If the last name is a suffix categorize it correctly
        if (name1.last.upper() in suffixes):
            name1.suffix = name1.last
            name1.last = ''
        if (name2.last.upper() in suffixes):
            name2.suffix = name2.last
            name2.last = ''

        #Check if last names of both the entries are same
        if (name1.last == name2.last or name1.last == '' or name2.last == ''):
            last.append(1)
        else:
            last.append(0)

        #Check if the first character of last name of both entries is same while one of the entries is having abbreviated last name
        if ((name1.last != '' and name2.last != '') and
            (name1.last == name2.last[0] or name1.last[0] == name2.last)):
            lastFirst.append(1)
        else:
            lastFirst.append(0)

        #Calculating the length of names for both the entries
        ln1_length.append(len(name1))
        ln2_length.append(len(name2))
        ln1.append(name1)
        ln2.append(name2)

    namesCross['ln1'] = ln1
    namesCross['ln2'] = ln2
    namesCross['sameDob'] = dob
    namesCross['sameGender'] = gender
    namesCross['sameFather'] = father
    namesCross['sameFname'] = first
    namesCross['distFname'] = editdist
    namesCross['sameLname'] = last
    namesCross['sameLnameInitial'] = lastFirst
    namesCross['ln1_len'] = ln1_length
    namesCross['ln2_len'] = ln2_length
    return namesCross
Example #20
0
                    if not checker and i is not [
                            '@', ':', ';', ',', '.', '?', '/', '-'
                    ]:
                        check_porce = pos[k - 2]
                        check_up_porce = pos[k - 1]
                        # print(check_porce)
                        # print(check_up_porce)
                        # print("----break----")
    return (person_list)


#
names = get_human_names(pdf_data)
# print("LAST, FIRST")
for name in names:
    last_first = HumanName(name).last + ', ' + HumanName(
        name).first + ', ' + HumanName(name).middle
    # print("here",last_first)

import re
import spacy
from nltk.corpus import stopwords

# load pre-trained model
nlp = spacy.load('en_core_web_sm')

# Grad all general stop words
STOPWORDS = set(stopwords.words('english'))

# Education Degrees
EDUCATION = [
Example #21
0
def parse_name(name):
    name = HumanName(name)
    return name