예제 #1
0
def copy_resources_from_data_folder_only_if_ready(p_root_path_data, p_config):
    is_ok_found = False
    root_found = ''
    folder_list = {}
    if not os.path.exists(p_root_path_data):
        print("error, the folder doesn't exist :", p_root_path_data)
        exit(-1)
    else:
        for root, dirs, files in os.walk(p_root_path_data):
            if is_ok_found and root_found in root:
                if (b_any("distance_map" in x for x in files)
                        or b_any("_image" in x for x in files)
                        or b_any("object_index" in x for x in files)):
                    print("Currently : {}".format(root))
                    result_folder, array_path_images = subdivide_image(
                        root, files, p_config)
                    if result_folder not in folder_list:
                        folder_list[result_folder] = []
                        folder_list[result_folder].append(array_path_images)
                    else:
                        folder_list[result_folder].append(array_path_images)

            else:
                if "OK.txt" in str(files):
                    # The file is found it means that we can process images
                    is_ok_found = True
                    root_found = root
                else:
                    is_ok_found = False
        prepare_data_for_learning(folder_list, p_config)
예제 #2
0
def eliminate_hidden_twins(hidden_twins_list, values):
    """Eliminate values using the naked twins strategy.
    Args:
        naked_twins (list): a list of lists containing each pair of hidden/naked twins. Example [[A1, B1], [D4, E5], [H4, C4]]
        values(dict): a dictionary of the form {'box_name': '123456789', ...}
    
    Returns:
        values(dict): the values dictionary with the naked twins eliminated from peers.
    """

    try:

        logger.info("hidden_twins() INITIAL VALUES : " + str(values) + "\n")

        for i in range(len(_twins_list)):

            box1 = _twins_list[i][
                0]  # From the previous example : box1 = naked_twins[0][0] --> A1
            box2 = _twins_list[i][
                1]  # From the previous example : box1 = naked_twins[0][0] --> B1

            logger.info("box1 = " + box1 + "\n")
            logger.info("box2 = " + box2 + "\n")

            # Build a set with the common peers, the intersection of both peers
            common_peers = set(peers[box1]) & set(peers[box2])
            logger.info("hidden_twins(): common_peers = " + str(common_peers) +
                        "\n")

            hidden_pair = set(values[box1]) & set(values[box2])
            hidden_pair = list(hidden_pair)
            logger.info("hidden_twins(): hidden_pair = " + str(hidden_pair) +
                        "\n")

            common_peers = [values[k] for k in common_peers]

            if not (b_any(hidden_pair[0] in x for x in common_peers)
                    or b_any(hidden_pair[1] in x for x in common_peers)):
                # logger.info("hidden_twins(): hidden_pair FOUND!!! = " + str(hidden_pair) + "\n")
                # logger.info("hidden_twins(): hidden_pair FOUND!!! not in common_peers = " + str(common_peers) + "\n")

                assign_value(values, box1, hidden_pair[0] + hidden_pair[1])
                assign_value(values, box2, hidden_pair[0] + hidden_pair[1])

                logger.info("hidden_twins(): assigned = " + hidden_pair[0] +
                            hidden_pair[1] + " in " + box1 + " and " + box2 +
                            "\n")

                logger.info("hidden_twins() UPDATED VALUES : " + str(values) +
                            "\n")

        return values

    except Exception as err:

        logger.error(
            "eliminate_twins(): Fatal error eliminating naked-twins \n")
예제 #3
0
def expressao_interrogativa(frase, traducao_glosas, tags):
    """
	Adiciona as expressões faciais em interrogativas globais.
	:param frase: frase
	:param traducao_glosas: frase com algumas regras manuais aplicadas
	:param tags: classes gramaticais das palavras
	:return:
	"""
    if frase[-1] == "?" and not b_any("PT" in x for x in tags) and not b_any(
            "RGI" in x for x in tags):
        traducao_glosas = "{" + traducao_glosas + "}(q)"

    return traducao_glosas
예제 #4
0
    def create_page_rank(self, lambda_value):
        i_vector = dict()
        r_vector = dict()

        list_websites = list(self.graph_bfs)
        # print(list_websites)

        for each_website in list_websites:
            i_vector[each_website] = (1 / len(list_websites))

        diff = 1
        iteration_count = 1
        while diff > 0.0005 and iteration_count <= 4:
            # print(str(self.graph_bfs['Mars_Rover']))
            diffsum = 0
            for each_website in list_websites:
                r_vector[each_website] = lambda_value / len(list_websites)

            for each_website in list_websites:
                list_inlinks = self.graph_bfs[each_website]
                q = []
                for each_link in list_inlinks:
                    try:
                        if b_any(each_link in x for x in list_websites) \
                                and b_any(each_website in x for x in self.graph_bfs[each_link]):
                            q.append(each_link)
                    except KeyError as e:
                        pass

                if len(q) > 0:
                    for each_outlink in q:
                        r_vector[each_outlink] = r_vector[each_outlink] \
                                                 + ((1 - lambda_value)*(i_vector[each_website]/len(q)))
                else:
                    for each_website_inner in list_websites:
                        r_vector[each_website_inner] = r_vector[each_website_inner] \
                                                 + ((1 - lambda_value)*(i_vector[each_website_inner]/len(list_websites)))

            for each_website in list(r_vector):
                diffsum += ((r_vector[each_website] -
                             i_vector[each_website])**2)

            i_vector = copy.deepcopy(r_vector)
            diff = diffsum**0.5

            iteration_count += 1

        return r_vector
예제 #5
0
def _addTopoInfo(theChainDef,chainDict, topoAlgs, doAtL2AndEF=True):

    maxL2SignatureIndex = -1
    for signatureIndex,signature in enumerate(theChainDef.signatureList):
        if signature['listOfTriggerElements'][0][0:2] == "L2":
            maxL2SignatureIndex = max(maxL2SignatureIndex,signatureIndex)
    
    inputTEsL2 = theChainDef.signatureList[maxL2SignatureIndex]['listOfTriggerElements'] 
    inputTEsEF = theChainDef.signatureList[-1]['listOfTriggerElements']

    #topoAlgs = chainDict["topo"]

    if ('muvtx' in topoAlgs):
       # import pdb;pdb.set_trace()
        theChainDef = generateMuonClusterLLPchain(theChainDef, chainDict, inputTEsL2, inputTEsEF, topoAlgs)
    elif ('revllp' in topoAlgs):
        theChainDef = generateReversedCaloRatioLLPchain(theChainDef, chainDict, inputTEsL2, inputTEsEF, topoAlgs)
    elif ('llp' in topoAlgs):
        theChainDef = generateCaloRatioLLPchain(theChainDef, chainDict, inputTEsL2, inputTEsEF, topoAlgs)
    elif b_any(('invm' or 'deta' in x) for x in topoAlgs):
        theChainDef = addDetaInvmTopo(theChainDef,chainDict,inputTEsL2, inputTEsEF, topoAlgs)
    else:
        logJet.error('Your favourite topo configuration is missing.')

    return theChainDef
예제 #6
0
def stageAlgo(types):
    desiredDataTypes = ['double']
    exclude = ['AZS','Tobins.Q','Tobins.Q.class','AZS.class','Feml.CEO.or.Equiv']

    variable_types = {}
    for index, row in types.iterrows():
        if b_any(row["Type"] in x for x in desiredDataTypes):
            if row["Field"] not in exclude:
                variable_types[row["Field"]]="c"

    return (variable_types)
예제 #7
0
def stageAlgo(types,target):
    desiredDataTypes = ['double']
    targetVars = ['AZS','Tobins.Q','Tobins.Q.class','AZS.class']

    variable_types = {}
    for index, row in types.iterrows():
        if b_any(row["Type"] in x for x in desiredDataTypes):
            if row["Field"] not in targetVars:
                variable_types[row["Field"]]="c"
    #add in the target
    variable_types[target]="c"
    return (variable_types)
예제 #8
0
def add_word_objects(step_objs, word_objs):
    """
    Adding the word level objects to the step level ones.
    :rtype: list
    """
    step_objs = set(step_objs.split(','))
    for obj in word_objs:
        obj = obj.split('>')[0].strip().lower()
        if obj and not obj.endswith(headless_objs) and not b_any(
                obj in x for x in step_objs):
            step_objs.add(obj)
    return list(step_objs)
예제 #9
0
    def __filter(tokens2d, pno):
        new_tokens2d = []
        new_pno = []
        for tokens1d, single_p in zip(tokens2d, pno):
            # here sent is a sentence of word sequences, and label is a sequence of labels for a sentence.

            # check if any of the labels in this sentence have POSITIVE_LABEL in them, if they do, then consider that
            # sentence, else discard that sentence.

            if b_any(cfg.POSITIVE_LABEL in token.label for token in tokens1d):
                new_tokens2d.append(tokens1d)
                new_pno.append(single_p)

        return new_tokens2d, new_pno
예제 #10
0
    def create_graph_from_map(self):
        list_urls = list(self.graph)

        url_vs_incoming_links = dict()

        for each_url in list_urls:
            in_links = []
            for listed_url in list_urls:
                if b_any(each_url in x for x in self.graph[listed_url]):
                    in_links.append(listed_url)
            url_vs_incoming_links[each_url] = in_links

        print("--------------------Graph--------------------")
        print(url_vs_incoming_links)

        self.write_dictionary_to_file(url_vs_incoming_links)
예제 #11
0
	def __receiveThread(self):
		'Hilo donde se imprimen los mensajes entrantes'
		while self.comPort:
			if self.comPort.inWaiting() > 0:
				c = self.comPort.readline()
				if(self.recuperarMensaje != None and self.recuperarMensaje in c.decode()): #En el caso de que se precise recuperar un mensaje
					self.mensajeRecuperado=c.decode()
				if(b_any( x in c.decode() for x in wordsToPrint)): 
					if(wordsToPrint[6] in c.decode()):
						msg = c.decode().split(",")
						print("\n Mensaje desde Topic: "+msg[1] + "\n	> "+msg[3])
					else:
						print(c.decode())
				if (debug): print("Debugger > " + c.decode())
			else:
				time.sleep(0.1)
예제 #12
0
def move_files(destdir, maindir):
    """
    Consolidates files from multi-zip download into a single directory for
    each subject.
    """

    print(f"Moving files from {maindir} and consolidating into {destdir}\n")
    if not os.path.exists(destdir):
        os.mkdir(destdir)

    # Generate list of full filepaths for each image file
    fileroots = list([os.path.join(root,filename) for root,dirnames,filenames \
                        in os.walk(maindir) if len(root.split('/'))==7 \
                        for filename in filenames])

    # Check if a directory already exists along the filepath to each image,
    # so that images from the same directory are consolidated together.
    # Otherwise, create the directory for the image in the destination dir.
    for filepath in fileroots:
        filedest = list([rootd for rootd,dirnamesd,filenamesd \
                        in os.walk(destination)])
        for step in range(3, len(filepath.split('/')[:-1])):
            if not b_any('/'.join(filepath.split('/')[3:step+1]).upper()==\
                        '/'.join(dest.split('/')[2:step]).upper() \
                        for dest in filedest if len(dest.split('/'))>=step):
                os.mkdir(os.path.join(destination,\
                        '/'.join(filepath.split('/')[3:step+1])))
        os.rename(filepath,
                  os.path.join(destination, '/'.join(filepath.split('/')[3:])))
    print(f'... empyting {maindir} ...\n')

    # Empty the Downloads folder
    while len(
            list([
                dwnld_dir for dwnld_dir in os.listdir(maindir)
                if os.path.isdir(os.path.join(maindir, dwnld_dir))
            ])) > 0:  #len(os.listdir(maindir)) > 2:
        for root, dirnames, filenames in os.walk(maindir):
            if os.path.isfile(os.path.join(root, '.DS_Store')):
                os.remove(os.path.join(root, '.DS_Store'))
            if os.path.isdir(root):
                if not os.listdir(root):
                    os.rmdir(root)
    print(
        f"All files moved from {maindir} and consolidated into {destdir}\n\n")
    return
예제 #13
0
    def filter(self):
        # tokens2d, pos_tags, and conll_deps are filtered if a sentence was not tagged
        new_tokens2d = []
        new_pos_tags = []
        new_conll_deps = []
        for tokens1d, pos_tag1d, deps1d in zip(self.tokens2d, self.pos_tags, self.conll_deps):
            # here tokens1d is a sentence of word sequences, and label is a sequence of labels for a sentence.

            # check if any of the labels in this sentence have POSITIVE_LABEL in them, if they do, then consider that
            # sentence, else discard that sentence.
            if b_any(cfg.POSITIVE_LABEL in token.label for token in tokens1d):
                new_tokens2d.append(tokens1d)
                new_pos_tags.append(pos_tag1d)
                new_conll_deps.append(deps1d)

        self.tokens2d = new_tokens2d
        self.pos_tags = new_pos_tags
        self.conll_deps = new_conll_deps
    def getDailyMailArticleLinks(self, topic, page_number):
        links = []
        page_distance = 50
        off_set = until = 0

        # Unlike independent, daily mail does not store topic in news article and must append this manually
        url_apendix = "|" + topic

        try:
            off_set = (page_number - 1) * page_distance
            if page_number == 1:
                until = page_distance
            else:
                until = ((page_number - 1) * page_distance) * 2

            url = ('https://www.dailymail.co.uk/home/search.html?offset={}&size={}&sel=site&searchPhrase={}&sort=recent&type=article&type=video&type=permabox&days=all'.format(off_set, until, topic))

            # Get the html for the page
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'lxml')

            # 1. Get the articles div
            articles_div = soup.find('div',attrs={'class':'sch-results'})

            # Get all of the divs that contain links
            article_divs = articles_div.findAll('div')

            # 2. First 2 and last article div does not contain links
            for article in article_divs[2:-1]:
                for url in article.findAll('a'):
                    if '#' not in url.get('href'):
                        # Ensure the link has not already been added somehow
                        if not b_any(url.get('href') in link for link in links):
                            # print('https://www.dailymail.co.uk/' + url.get('href'))
                            links.append('https://www.dailymail.co.uk/' + url.get('href') + url_apendix)


        except AttributeError:
            print('Failed to find what we looked for')
            print('Sleeping...')
            time.sleep(10)

        return links  
예제 #15
0
def search(files, mod, tag, search):
    """
    Return a dictionary of files containing the specific tag,
    as well as the tags that exist in those files for additional
    context to be provided to the user.
    """

    # construct relevant pieces
    search_construct = mod + search

    tags_per_file = dict()

    for file in files:
        with open(file, 'r') as fl:
            contents = fl.read()
            # get all tags in a file
            taglist = set(re.findall(search_construct, contents))
            # strip modifier from all tags
            taglist_stripped = [tag.strip(mod) for tag in taglist]
            # substring match our tag against the items in taglist_stripped
            if b_any(tag in tags for tags in taglist_stripped):
                # file contains a match, store all tags for context
                tags_per_file[file] = taglist_stripped
    return tags_per_file
예제 #16
0
def fetch(region):
    print('>Fetching {0}'.format(region))

    url = 'https://www.wowhead.com/world-quests/{0}'.format(region)

    req = Request(url)
    try:
        urlcleanup()
        response = urlopen(req)
    except URLError as e:
        if hasattr(e, 'reason'):
            print('   We failed to reach a server.')
            print('   Reason: ', e.reason)
        elif hasattr(e, 'code'):
            print('    The server couldn\'t fulfill the request.')
            print('    Error code: ', e.code)
        return None, None
    else:
        html = response.read()
        soup = BeautifulSoup(html, 'html.parser')

        # get text
        text = soup(["script"])

        for tag in text:
            if b_any('lvWorldQuests' in word for word in tag.contents):
                for line in tag.contents:
                    moreLines = line.split('\n')
                    wqLines = list(
                        filter(lambda x: 'lvWorldQuests' in x, moreLines))
                    for wqLine in wqLines:
                        _lines = wqLine.split(', data: [')
                        quests = json.loads('[' + _lines[1][:-4] + ']')
                        return quests

        return 'worldQuest'
예제 #17
0
def get_sites(permissions):
    site_slugs = []
    if b_any('scribd' in x for x in permissions):
        site_slugs.append('SC')

    if b_any('google' in x.lower() for x in permissions):
        site_slugs.append('GB')

    if b_any('kobo' in x.lower() for x in permissions):
        site_slugs.append('KO')

    if b_any('test' in x.lower() for x in permissions):
        site_slugs.append('TB')

    if b_any('livraria' in x.lower() for x in permissions):
        site_slugs.append('LC')

    if b_any('audio' in x.lower() for x in permissions):
        site_slugs.append('AU')
    return site_slugs
def parser(ingredient, food_array):
    if type(ingredient) != str:
        return

    parsed_word = ''

    # Removes unnecessary special characters
    ingredient = ingredient.strip()
    ingredient = ingredient.replace('-', ' ')
    ingredient = ingredient.replace('+', ' ')
    ingredient = ingredient.replace(':', ' ')
    ingredient = ingredient.replace(';', ' ')
    ingredient = ingredient.replace('/', ' ')
    ingredient = ingredient.replace('\'', ' ')
    ingredient = ingredient.replace('\"', ' ')
    ingredient = ingredient.replace('%', ' ')
    ingredient = ingredient.replace('.', ' ')
    ingredient = ingredient.replace('&', ' ')
    ingredient = ingredient.replace('[', ' ')
    ingredient = ingredient.replace(']', ' ')
    ingredient = ingredient.replace('®', '')
    ingredient = ingredient.replace('\u2009', ' ')
    # Breaks each word into a string array
    split_item = ingredient.split(" ")
    # print(split_item)
    for word in split_item:
        word = word.lower()
        #Takes care of wholenumbers, decimals, and fractions
        if word.isnumeric() or word.isdecimal():
            continue
        elif b_any(word in x for x in description_exceptions):
            parsed_word = word + ' '
            continue
        elif word in nonplurals:
            word = word[:-1]
            parsed_word = parsed_word + word + ' '
            continue
        elif ',' in word:
            last_word = word.replace(',', '')
            if last_word in nonplurals:
                last_word = last_word[:-1]
            parsed_word = parsed_word + last_word
            break
        elif word == 'or':
            break
        elif word == 'and':
            parsed_word = parsed_word.rstrip()
            # food_array.append(parsed_word)
            parsed_word = ''
            continue
        elif '(' in word or ')' in word:
            continue
        elif b_any(word in x for x in vulgarFractions):
            continue
        elif b_any(word in x for x in measurementUnits):
            continue
        elif b_any(word in x for x in measurementUnitsAbbreviations):
            continue
        elif b_any(word in x for x in numbers):
            continue
        elif b_any(word in x for x in brands):
            continue
        elif b_any(word in x for x in descriptions):
            continue
        elif b_any(word in x for x in modifier):
            continue
        elif b_any(word in x for x in precedingAdverbs):
            continue
        elif b_any(word in x for x in succeedingAdverbs):
            continue
        elif b_any(word in x for x in prepositions):
            continue
        elif b_any(word in x for x in descriptionsWithPredecessor):
            continue
        elif b_any(word in x for x in unnecessaryDescriptions):
            continue
        elif b_any(word in x for x in hypenatedPrefixes):
            continue
        elif b_any(word in x for x in hypenatedSuffixes):
            continue
        else:
            parsed_word = parsed_word + word + ' '

    parsed_word = parsed_word.strip()
    #return parsed_word
    #print(parsed_word)
    #Prevent's blank spots in ingredients array
    if parsed_word == '':
        return food_array
    else:
        food_array.append(parsed_word)
        return food_array
    def func_depd_pruning(self):
        '''
                function for determining functional dependencies using the naive approach.
                :return: None
                '''
        input = ['movieid', 'type', 'startyear', 'runtime', 'avgrating', 'genreid', 'genre', 'memberid', 'birthyear',
                 'role']

        output = sum([list(map(list, combinations(input, i))) for i in range(3)], [])

        output.pop(0)  # deleting the empty set

        concat_list = [', '.join(sub_list) for sub_list in output]


        table_list=[]
        table_dict={}

        for column in concat_list:
            column_list = []
            query = "SELECT array_agg(nid) FROM normalization GROUP BY "+ column +" order by " + column
            self.cursor.execute(query)
            arrays = self.cursor.fetchall()
            for array in arrays:
                array=str(array)
                array = array.strip('()[],')
                column_list.append(array.translate('()[]').split(', '))
            table_list.append(column_list)


        table_dict.clear()


        for i in range(0,len(concat_list)):
            table_dict[concat_list[i]] = table_list[i]

        func_depd=[]

        for left_col in table_dict.keys():
            for right_col in input:
                count=0
                lolleft=table_dict[left_col]
                lolright=table_dict[right_col]
                for left_list in lolleft:
                    for right_list in lolright:
                        if set(left_list) <= set(right_list):
                            count+=1
                            break
                        else:
                            continue
                if count == len(lolleft):
                    leftcollist=left_col.split(", ")
                    someflag=True
                    for col in leftcollist:
                        if col.strip(" ") != right_col.strip(" "):
                            continue
                        else:
                            someflag=False
                    if someflag:
                        word = "-->" + str(right_col)
                        if not b_any(word in x for x in func_depd):
                            func_depd.append(left_col + "-->"+ right_col)


        print(func_depd)
예제 #20
0
def main(*args):
    stock_path = 'Stock_Data'

    if args[0] == 1:
        rmtree(stock_path)

    if not os.path.exists(stock_path):
        os.makedirs(stock_path)

    start = str(datetime.date.today() -
                datetime.timedelta(days=int(args[1] * 365)))
    end = str(datetime.date.today())

    tickers = pd.read_csv(args[2])

    onlyfiles = [f for f in listdir(stock_path) if isfile(join(stock_path, f))]
    Doneticks = [i.split('.csv', 1)[0] for i in onlyfiles]

    skipticks = []

    for tick, comp, divi, mc in zip(tickers['Symbol'], tickers['Name'],
                                    tickers['Dividend'],
                                    tickers['Market Cap, Billions']):
        f_str = tick + '__' + comp
        if not b_any(f_str in x for x in Doneticks):
            try:
                df = pdr.get_data_yahoo(tick, start, end)
                if len(df) > 0:
                    df = df.reset_index()
                    df['Count'] = df.index
                    df['Week Day'] = -1
                    df['Change'] = ((df['Close'] / df['Open']) - 1) * 100
                    for index, row in df.iterrows():
                        date = row['Date']
                        wk_day = date.weekday()
                        df.at[index, 'Week Day'] = wk_day

                    period = 14
                    delta = df['Close'].diff()
                    dUp, dDown = delta.copy(), delta.copy()
                    dUp[dUp < 0] = 0
                    dDown[dDown > 0] = 0
                    RS = dUp.rolling(period).mean() / dDown.rolling(
                        period).mean().abs()
                    df['RSI'] = 100.0 - (100.0 / (1.0 + RS))
                    df = df.fillna(df['Close'].min())
                    fname = stock_path + '/' + tick + '__' + comp + '.csv'
                    df.to_csv(fname, sep=',', encoding='utf-8', index=False)
                else:
                    tickers = tickers[tickers.Symbol != tick]

            except Exception:
                skipticks.append(tick)
                pass
    tickers.to_csv(args[2][:-4] + '-new.csv',
                   sep=',',
                   encoding='utf-8',
                   index=False)  # saves out csv with delisted tickers
    if len(listdir(stock_path)) < 7000:
        print(
            f'Missing some stocks (returned only {len(listdir(stock_path))} when there is likely more)'
        )
예제 #21
0
 def does_sent_have_tags(labels):
     return b_any('B' in x or 'I' in x for x in labels)
예제 #22
0
# rsi_df['Below 30']=downs

# rsi_df1=rsi_df[rsi_df['Above 70']>0]
# rsi_df2=rsi_df1[rsi_df1['Below 30']>0]
# rsi_df3=rsi_df2[rsi_df2['Total']>50]

     
Files_for_plotting= listdir(stock_path)
Files_for_plotting = Files_for_plotting[1:3]
for ff in Files_for_plotting:
    t_c=ff.split('.csv', 1)[0]
    tick=t_c.split('__', 1)[0]
    company=t_c.split('__', 1)[1]

    check=tick + '_'+company
    if not b_any(check in x for x in Doneticks_plots):
    
        csvname = stock_path+'/'+ff
        df_data=pd.read_csv(csvname, encoding="ISO-8859-1")
        x1=df_data['Count'].reset_index()
        x1=x1+(1260-np.max(x1))
        y1=df_data['Close'].reset_index()
        x1=x1.drop(['index'], axis=1)
        y1=y1.drop(['index'], axis=1)

        
        if len(x1)>30:
            x2=x1.tail(520)
            x3=x1.tail(260)
            x4=x1.tail(195)
            x5=x1.tail(130)
예제 #23
0
def create_and_store_lyrics(sp, artist_name=None, artist_dic=None):
    # this is for scraping genius.com

    dictionary = enchant.Dict("en_US")

    albums = sp.artist_albums(artist_id=artist_name, limit=50)

    songs = []

    for i in range(len(albums['items'])):
        album_uri = albums['items'][i]['uri']
        album_tracks = sp.album_tracks(album_uri)

        for j in range(len(album_tracks['items'])):
            album_song = album_tracks['items'][j]['name']
            songs.append(album_song)

    global_list = []

    for title in songs:
        j = title.split()
        local_list = []
        for word in j:
            if word.isalnum() and dictionary.check(word) == True:
                local_list.append(word.lower())

        global_list.append('-'.join(local_list))

    res = []

    for i in range(len(global_list)):
        try:
            r = requests.get('https://genius.com/Genius-translations-' +
                             dic[artist_dic] + '-' + global_list[i] +
                             '-english-translation-lyrics')
            soup = BeautifulSoup(r.text, 'lxml')
            lyrics = soup.find('div', class_='lyrics').get_text()
            if [global_list[i], lyrics] not in res:
                res.append([global_list[i], lyrics])
        except Exception as e:

            r = requests.get('https://genius.com/' + dic[artist_dic].title() +
                             '-' + global_list[i] + '-lyrics')
            soup = BeautifulSoup(r.text, 'lxml')
            try:
                lyrics = soup.find('div', class_='lyrics').get_text()
                if 'English Translation' in lyrics:

                    lyrics_split = lyrics.split('English Translation')

                    search_list = [
                        'Japanese Translation', 'Romanized', 'Korean Original',
                        'Hangul', 'French Translation', 'Romanization',
                        'Chinese Translation', 'Original', 'Chinese',
                        'Japanese', 'French', 'Korean'
                    ]

                    s = lyrics.split('English Translation')[1].split()

                    for index in range(len(s)):
                        next_index = index + 1
                        if b_any(substring in lyrics
                                 for substring in search_list):
                            while s[next_index] not in search_list and next_index < len(
                                    s) - 1:
                                next_index += 1

                            english_lyrics = ' '.join(s[index:next_index])
                            break

                        else:
                            english_lyrics = ' '.join(s)

                    if [global_list[i], english_lyrics] not in res:
                        res.append([global_list[i], english_lyrics])

            except Exception as e:
                pass

    df = pd.DataFrame(res).to_csv('lyrics_data/' + dic[artist_dic] +
                                  '_translated_lyrics.csv')
예제 #24
0
# Acronym
# Robyn Lesch
# 17 June 2020
# Mood: exhausted

ignoreStrings = input("Enter words to be ignored separated by commas: \n")
ignoreList = ignoreStrings.split(",")
title = input("Enter a title to generate its acronym: \n")
titleList = title.split()

from builtins import any as b_any
print("The acronym is:")
for x in titleList:
    if (b_any(x.lower() in y.lower() for y in ignoreList)):
        print("", end="")
    else:
        print(x[0].upper(), end="")
def create_and_store_lyrics(sp, artist_name=None, artist_dic=None):
    # this is for scraping genius.com

    dictionary = enchant.Dict("en_US")

    albums = sp.artist_albums(artist_id=artist_name, limit=50)

    songs = []

    for i in range(len(albums['items'])):
        album_uri = albums['items'][i]['uri']
        album_tracks = sp.album_tracks(album_uri)

        for j in range(len(album_tracks['items'])):
            album_song = album_tracks['items'][j]['name']
            songs.append(album_song)

    global_list = []

    for title in songs:
        j = title.split()
        local_list = []
        for word in j:
            if word.isalnum() and dictionary.check(word) == True:
                local_list.append(word.lower())

        global_list.append('-'.join(local_list))

    res = []

    for i in range(len(global_list)):
        try:
            r = requests.get('https://genius.com/' + dic[artist_dic] + '-' +
                             global_list[i] + '-lyrics')
            soup = BeautifulSoup(r.text, 'lxml')
            lyrics = soup.find('div', class_='lyrics').get_text().lower()

            split_list = ['hangul', 'korean', 'korean original']

            search_list = [
                'japanese', 'romanized', 'french', 'romanization', 'original',
                'chinese', 'japanese', 'english', 'translation'
            ]

            total_list = split_list + search_list

            if not b_any(
                    substring in lyrics for substring in total_list
            ):  # 1) if this is just pure Korean text. Sometimes there will be translated lyrics without explicit mention of translations, resulting in these lyrics being stored.
                if [global_list[i], lyrics] not in res:
                    res.append([global_list[i],
                                lyrics])  # just append the Korean text

            else:
                if any(
                        substring in lyrics for substring in split_list
                ):  # 2) if this has Korean lyrics mixed with other translations:
                    split_string = [
                        i for i in filter(lambda x: x in lyrics, split_list)
                    ][0]

                    s = lyrics.split(split_string)[1].split()
                    for index in range(len(s)):
                        next_index = index + 1

                        if b_any(substring in lyrics
                                 for substring in search_list):
                            while s[next_index] not in search_list and next_index < len(
                                    s) - 1:
                                next_index += 1

                            korean_lyrics = ' '.join(
                                s[index:next_index]
                            )  # use sliding window to find the end of korean lyrics through key words in search_list and extract the korean lyrics
                            break

                    if [
                            global_list[i], korean_lyrics
                    ] not in res:  # append the found Korean lyrics to the result list
                        res.append([global_list[i], korean_lyrics])

                else:  # 3) if these are foreign translations without Korean words, just drop it
                    pass

        except Exception as e:  # if the link itself doesn't work, just drop the song
            pass

    df = pd.DataFrame(res).to_csv('../lyrics_data/Korean_lyrics/' +
                                  dic[artist_dic] + '_original_lyrics.csv')
예제 #26
0
    def create_dict_list_with_key(n: int, value_input: np.ndarray,
                                  in_key: Union[str, list]) -> list:
        dict_list = []
        for i in range(0, n):  # iterate over samples
            param_dict = dict()
            for j in range(
                    0, get_dimension(in_key)):  # iterate over input parameters
                # if key is nested key, extract just the last bit to find the type
                tmp_parts = str.split(in_key[j], '.')

                bool_set_key = True

                if len(tmp_parts) > 1:
                    key = tmp_parts[-1]  # last entry
                else:
                    key = in_key[j]

                if b_any(key in x for x in INTEGER_PARAMETERS
                         ):  # self.key[j] in INTEGER_PARAMETERS:
                    value = int(np.round(value_input[j, i]))
                elif b_any(key in x for x in LIST_PARAMETERS):
                    value = [value_input[j, i]]  # convert to list
                elif b_any(key in x for x in GROUP_PARAMETERS):
                    value = [
                        1 - value_input[j, i], 1 - value_input[j, i]
                    ]  # convert to list with percentage of singles and couples
                elif b_any(key in x for x in TOPOGRAPHY_SYMMETRY_PARAMETERS):
                    # todo: special file or sth for this config that is only for one specific scenario file
                    # vertical center: 7.5
                    v_center_line = 7.5
                    obstacle_height = 6.0
                    my_key = "obstacles.[id==1].y"  # lower obstacle (Liddle_bhm_v3)
                    my_value = -value_input[
                        j, i] / 2 + v_center_line - obstacle_height
                    param_dict[my_key] = my_value

                    my_key = "obstacles.[id==2].y"  # upper obstacle
                    my_value = value_input[j, i] / 2 + v_center_line
                    param_dict[my_key] = my_value

                    distance_to_obstacle = 0.3  # distance between intermediate target and obstacle
                    height_of_obstacle = 10

                    # for intermediate target
                    # my_key = "targets.[id==1].height"
                    # my_value = value_input[j, i]
                    # target_1_height = my_value - distance_to_obstacle * 2
                    # param_dict[my_key] = target_1_height
                    #
                    # my_key = "targets.[id==1].y"  # intermediate target
                    # my_value = v_center_line - target_1_height / 2
                    # param_dict[my_key] = my_value

                    my_key = "targets.[id==1].height"
                    target_2_height = value_input[j, i]
                    param_dict[my_key] = target_2_height

                    my_key = "targets.[id==1].y"  # target
                    my_value = v_center_line - target_2_height / 2
                    param_dict[my_key] = my_value

                    bool_set_key = False

                else:
                    value = value_input[j, i]

                if bool_set_key:
                    param_dict[in_key[j]] = value

            dict_list.append(param_dict)
        return dict_list
예제 #27
0
maxlen = 25
char_idx = None
if os.path.isfile(char_idx_file):
    print('Loading previous char_idx')
    char_idx = pickle.load(open(char_idx_file, 'rb'))

X, Y, char_idx = \
    textfile_to_semi_redundant_sequences(
        path, seq_maxlen=maxlen, redun_step=1)
pickle.dump(char_idx, open(char_idx_file, 'wb'))

# Instantiating checkpoint finder
checkpoint = False
list_of_files = os.listdir()
checkpoint_type = ".data-00000-of-00001"
if b_any(checkpoint_type in x for x in list_of_files):
    checkpoint = True

    def extract_number(f):
        s = re.findall("(\d+).data-00000-of-00001", f)
        return (int(s[0]) if s else -1, f)

    target = (max(list_of_files, key=extract_number))
    target = target.split('.')
    target = target[0]

# Begin Main loop
with tf.device('/cpu:0'):
    # Launch tensorboard (This is disabled as it causes Python to crash)
    #os.spawnl(os.P_NOWAIT, "tensorboard --logdir='/tmp/tflearn_logs/" + ID + "'")
    #os.spawnl(os.P_NOWAIT, "start \"\" http://localhost:6006")
    def func_depd_pruning(self):
        '''
        function for determining functional dependencies using the pruning approach. This function determines functional dps
        for company as a sample. We have run the same code for other tables in our model and the results are documented in the write-up
        :return: None
        '''
        input = ['ticker', 'exchange', 'company_name', 'sector', 'industry']

        output = sum([list(map(list, combinations(input, i))) for i in range(3)], [])

        output.pop(0)  # deleting the empty set

        concat_list = [', '.join(sub_list) for sub_list in output]

        table_list = []
        table_dict = {}

        for column in concat_list:
            column_list = []
            query = "SELECT array_agg(ticker) FROM company GROUP BY " + column + " order by " + column
            self.cursor.execute(query)
            arrays = self.cursor.fetchall()
            for array in arrays:
                array = str(array)
                array = array.strip('()[],')
                column_list.append(array.translate('()[]').split(', '))
            table_list.append(column_list)

        table_dict.clear()

        for i in range(0, len(concat_list)):
            table_dict[concat_list[i]] = table_list[i]

        func_depd = []

        for left_col in table_dict.keys():
            for right_col in input:
                count = 0
                lolleft = table_dict[left_col]
                lolright = table_dict[right_col]
                for left_list in lolleft:
                    for right_list in lolright:
                        if set(left_list) <= set(right_list):
                            count += 1
                            break
                        else:
                            continue
                if count == len(lolleft):
                    leftcollist = left_col.split(", ")
                    someflag = True
                    for col in leftcollist:
                        if col.strip(" ") != right_col.strip(" "):
                            continue
                        else:
                            someflag = False
                    if someflag:
                        word = "-->" + str(right_col)
                        if not b_any(word in x for x in func_depd):
                            func_depd.append(left_col + "-->" + right_col)

        print(func_depd)
for lineNum, line in enumerate(data,1):
    currLine = []
    currLine = line.split()
    for wordNum, word in enumerate(currLine,1):
        if not word.isdigit():
            if word not in en:
                if word not in lib.keys():    
                    stemmed = stemmer.stem(word)
                    if stemmed not in lib.keys():
                        #print(stemmed)
                        typo[stemmed] = Typo(lineNum,wordNum,word)

for word in typo.keys():
    print(word)
    for x in range(0, len(word)):
        if not b_any(i.startswith(word[0:x]) for i in lib.keys()):
            typo[word].setErrorIndexFront(x)
            #print(word[0:x])
            #print(word[0:typo[word].getErrorIndexFront() - 1])
            break

    for x in range(len(word) - 2, 0, -1):
        if not b_any(i.endswith(word[x:len(word)]) for i in lib.keys()):
            typo[word].setErrorIndexRear(x)
            #print(word[x:len(word)])
            #print(word[typo[word].getErrorIndexRear()+1:len(word)])
            break

    for sug in lib.keys():
        if abs(len(sug) - len(word))<2:
            if sug.startswith(word[0:typo[word].getErrorIndexFront() - 2]):
예제 #30
0
def find_files(in_path, ext, targets, template='(?<=\d{2})\d{5}', sub=False):
    """
    Finds matching files with extension ext and returns them in
    the order of the targets list given as argument
    Returns a dictionary identical to what I was using before
    Also drops duplicates
    """
    # Go through each directory and see if I can find the subjects I am looking
    # for
    ext = '*{}'.format(ext)
    out_dict = {key: [] for key in ['sub_name', 'dir', 'path']}
   
    if not sub:
        sub_dirs = [d for d in os.walk(in_path).next()[1]]

        for sub_dir in sub_dirs:
            tmp_dir = os.path.join(in_path, sub_dir)
            in_files = glob.glob(os.path.join(tmp_dir, ext))
            tmp_dict = dict()

            # Get the files that we have
            matches = [x for x in targets if b_any(str(x) in t for t in in_files)]

            for in_file in in_files:
                sub_name = os.path.basename(in_file.split('.')[0])
                sub_id = int(re.search(r'{}'.format(template), sub_name).group())
                if sub_id in tmp_dict.keys():
                    # This is a duplicate
                    continue
                tmp_dict[sub_id] = (sub_name, in_file)

            # Re-sort the path info
            sort_list = list()
            for target in matches:
                sub_name, in_file = tmp_dict[target]
                out_dict['sub_name'].append(sub_name)
                out_dict['dir'].append(sub_dir)
                out_dict['path'].append(in_file)
    else:
        sub_dir = sub
        tmp_dir = os.path.join(in_path, sub_dir)
        in_files = glob.glob(os.path.join(tmp_dir, ext))
        tmp_dict = dict()

        # Get the files that we have
        matches = [x for x in targets if b_any(str(x) in t for t in in_files)]

        for in_file in in_files:
            sub_name = os.path.basename(in_file.split('.')[0])
            sub_id = int(re.search(r'{}'.format(template), sub_name).group())
            if sub_id in tmp_dict.keys():
                # This is a duplicate
                continue
            tmp_dict[sub_id] = (sub_name, in_file)

        for target in matches:
            sub_name, in_file = tmp_dict[target]
            out_dict['sub_name'].append(sub_name)
            out_dict['dir'].append(sub_dir)
            out_dict['path'].append(in_file)
    return out_dict