Python setload Examples

Programming Language: Python

Namespace/Package Name: mxproxy.mx

Method/Function: setload

Examples at hotexamples.com: 5

Python setload - 5 examples found. These are the top rated real world Python examples of mxproxy.mx.setload extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def __init__(self, url):
        #SEED VARIABLES
        self.url = url
        self.baseurl = '/'.join(url.split('/')[0:3])
        self.domain = url.split("/")[2]
        self.regex = self.baseurl + r'/.*'
        print(self.regex)

        #INIT AND SETUP VARIABLES
        self.programName = 'hyperScraper'
        self.profileFolder = self.programName + "_profiles"
        self.currentDomainPath = './{}/{}/'.format(self.profileFolder,
                                                   self.domain)
        self.domainDataFolder = self.currentDomainPath + "data/"
        self.domainExploreFile = self.currentDomainPath + 'explored.url'
        self.domainVisitFile = self.currentDomainPath + 'visited.url'
        self.profile_check_make(self.domain)  #MAKE A PROFILE FOR DOMAIN

        #RUNTIME VARIABLES
        self.exploredURLMemory = mx.setload(self.domainExploreFile)
        self.visitedURLMemory = mx.setload(self.domainVisitFile)
        self.pendingURLMemory = list(self.exploredURLMemory -
                                     self.visitedURLMemory)

        #INITIAL SEED
        self.visit(url)  #initialize seed url and build links

Example #2

Show file

File: hyperScraper.py Project: shirish-01/DataCake

 def __init__(self, url, myregex=r'/.*'):
     #SEED VARIABLES
     self.url = url  #same as input while initializing
     self.domain = url.split("/")[2]  #==www.asdasd.com
     self.topLevelDomain = ".".join(self.domain.split('.')[1:])
     self.baseurl = '/'.join(url.split('/')[:3])  #==https://www.asdasd.com
     self.regex = self.baseurl + myregex
     print("LOG: regex url matching pattern is >>", self.regex)
     #INIT AND SETUP VARIABLES
     self.programName = 'hyperScraper'
     self.profileFolder = self.programName + "_profiles/"
     self.currentDomainPath = self.profileFolder + f'{self.domain}/'
     self.domainDataFolder = self.currentDomainPath + "data/"
     self.domainExploreFile = self.currentDomainPath + 'explored.url'
     self.domainVisitFile = self.currentDomainPath + 'visited.url'
     self.profile_check_make(self.domain)  #MAKE A PROFILE FOR DOMAIN
     #INIT RUNTIME VARIABLES
     self.exploredURLMemory = mx.setload(self.domainExploreFile)
     self.visitedURLMemory = mx.setload(self.domainVisitFile)
     self.pendingURLMemory = list(self.exploredURLMemory -
                                  self.visitedURLMemory)
     #INITIAL SEED
     try:
         self.visit(url)  #initialize seed url and build links
     except Exception as e:
         print(
             "ERROR: initial Seed encountered serious error change to {raise e} in this line to track it"
         )
         raise e

Example #3

Show file

File: auto_dict.py Project: shirish-01/DataCake

lexico_index_path='./Dictionary/lexico_index.set';	mx.touch(lexico_index_path)
lexico_dict_path='./Dictionary/lexico_dict.jsonl'; 	mx.touch(lexico_dict_path)

#NLP---------------------------------------
def get_lemma(WORD):
	from spacy import load as spacy_load 
	try:
		Cache.nlpengine
	except:
		Cache.nlpengine = spacy_load('en_core_web_sm',disable=['parser','ner', 'tagger', 'textcat'])
	w=Cache.nlpengine(WORD)[0]
	return w.lemma_.lower() if w.lemma_ != '-PRON-' else w.lower_

#CACHE LAYERS------------------------------
Cache.base_dict=mx.jload(basedict_path)
Cache.lexico_index=mx.setload(lexico_index_path)
Cache.lexico_dict=mx.jloadlines(lexico_dict_path)

#CORE--------------------------------------
def get_definition(WORD):
	base_dict=Cache.base_dict
	defn=base_dict.get(WORD) if base_dict.get(WORD) else '' 
	finds=re.findall(r'[\d].*?;',defn)
	if finds:
		return finds
	else:
		return [defn]
#--------------------------------------
def lexico_fetch(WORD): #lower Level
	WORD=get_lemma(WORD)
	while True:

Example #4

Show file

File: auto_dict.py Project: shirish-01/DataCake

def seed_dict_with_words():
	#do seeding of dict with standard most common words.
	words={POOL.apply_async(get_lemma, (x.lower(),)) for x in mx.setload('./Dictionary/3000common.set')} ;
	words={x.get() for x in words}
	add_words_to_dictionary(words)

Example #5

Show file

import os
from mxproxy import mx
import re


def get_body(url, bodyclass=''):
    if bodyclass:
        return mx.get_page(url).find(class_=bodyclass)
    else:
        print('define bodyclass as parameter of this function')


if __name__ == '__main__':
    SUBJECTFOLDER = 'SEM-5/DIP/'
    url = 'https://www.sanfoundry.com/1000-digital-image-processing-questions-answers/'
    page = mx.get_page_soup(url)

    links = {x['href'] for x in page.select('.entry-content a')}

    mx.touch(SUBJECTFOLDER + 'visited.url')
    visited = mx.setload(SUBJECTFOLDER + 'visited.url')

    pendinglinks = links - visited
    for x in pendinglinks:
        text = mx.get_page_soup(x).select_one('.entry-content').text
        mx.fappend('SEM-5/DIP/BigData.txt', text)
        mx.fappend(SUBJECTFOLDER + 'visited.url', x)
        # print(text)