Python Extract 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Extract

클래스/타입: Extract

hotexamples.com에서의 예제들: 13

Python Extract - 13개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Extract.Extract에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Extract(8)

run(3)

XtractAreaLife(1)

XtractDeathCause(1)

XtractName(1)

XtractText(1)

__init__(1)

databases(1)

extract_data(1)

getAPISData(1)

getCSVData(1)

예제 #1

파일 보기

파일: Transform.py 프로젝트: Dancingintherain/ETL-Pipeline

 def __init__(self, dataSource, dataSet):
     extractObj = Extract()
     if dataSource == 'api':
         self.data = extractObj.getAPISData(dataSet)
     elif dataSource == 'csv':
         self.data = extractObj.getCSVData(dataSet)
     else:
         self.data = extractObj.databases(dataSet)

예제 #2

파일 보기

파일: techtales.py 프로젝트: davechallis/techtales

	def default(self, url1=None, url2=None, field=None):
		if not url1 or not url2 or not field:
			return dumps({'status':'error', 'message':'Need URIs and field'})
		extract1 = Extract(url1, datapath, cachepath, extractorpath) 
		extract2 = Extract(url2, datapath, cachepath, extractorpath)
		data1 = extract1.run()
		data2 = extract2.run()
		chart = ComparisonChart(url1, url2, data1, data2)
		url = chart.get_graph_url_for_field(field)
		return dumps({'status':'ok', 'url':url})

예제 #3

파일 보기

파일: Answers.py 프로젝트: srianbury/MLGrader

    def process_image(self, image):
        file_converter = FileConverter()
        #convert the image to byte string
        image_bytes = file_converter.png_to_jpeg(image)

        scanner = Scanner()
        #scan the image and give it a birds eye view, returns a np of pixels that makes up the image
        scan_np = scanner.scan(image_bytes)

        #extract the individual answers from the scanned test
        extractor = Extract()
        answers = extractor.get_all_answers(scan_np, 5)

        color = Color()
        bw_answers = color.all_ans_to_bw(answers)

        size = Size()
        DIM = (28, 28)
        shrunk_images = size.shrink_images(bw_answers, DIM)

        #convert the answer images to a single array, which we used in training our model
        answers_flat = file_converter.convert_images(
            shrunk_images)  #returns image as (1, 28, 28, 1) and as type float

        #now that we have a list of images of the answers as bw 1D numpy arrays,
        # we can run them through our model and grade them
        # first we need to load our model
        model_loader = ModelLoader()
        MODEL_JSON = 'models/modified_model_98.json'
        MODEL_WEIGHTS = 'models/modified_model_98.h5'
        model = model_loader.load_model_2(MODEL_JSON, MODEL_WEIGHTS)
        #compile model
        model.compile(optimizer=RMSprop(lr=0.001),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        grader = Grader()
        answers = grader.get_answers(answers_flat, model)

        #get the images as a 784 (28x28) length string so we can store the data in a database
        ans_strings = file_converter.get_string_images(answers_flat)
        compressed_images = file_converter.compress_images(ans_strings)

        #add the images to database so we can create a large dataset of handwritten letters
        # storage = Storage()
        # storage.insert(answers, compressed_images)

        return answers

예제 #4

파일 보기

파일: techtales.py 프로젝트: davechallis/techtales

	def default(self, url=None, fields=None):
		if not url or not fields:
			return dumps({'status':'error', 'message':'Need URI and fields'})
		extract = Extract(url, datapath, cachepath, extractorpath)
		data = extract.run()
		chart = GoogleChart(data)
		field_arr = fields.split(",")
		nfield_arr = []
		for field in field_arr:
			field = field.strip()
			if field != '':
				nfield_arr.append(field)

		if len(nfield_arr) == 1:
			url = chart.get_graph_url_for_field(nfield_arr[0])
		else:
			url = chart.get_graph_url_for_fields(nfield_arr)
		return dumps({'status':'ok', 'url':url})

예제 #5

파일 보기

class Transform:
    def __init__(self, name):
        self.name = name
        self.extract = Extract(name)

    def get_sentiment(self, text):
        sentimentBlob = TextBlob(text)
        if (sentimentBlob.sentiment.polarity > 0):
            return ('Positive')
        else:
            return ('Negative')

    def get_stock_data(self):
        data = self.extract.get_stockprice_extract()
        stock_data = data.json()
        return stock_data

    def get_ner_dict(self, description):
        ner_list = []
        spacy_nlp = en_core_web_sm.load()
        document = spacy_nlp(description)
        for element in document.ents:
            ner_list.append(str(element))
        return ner_list

    def get_news_data(self):
        data = self.extract.get_news_extract()
        for article in data['articles']:
            if article['description'] is not None:
                sentiment = self.get_sentiment(article['description'])
                article['sentiment'] = sentiment
                ner_tags = self.get_ner_dict(article['description'])
                article['ner_tags'] = ner_tags
            else:
                article['sentiment'] = 'None'
                article['ner_tags'] = 'None'

        return data

예제 #6

파일 보기

파일: Driver.py 프로젝트: Rohith264/AirspaceAssesment

class Driver:
    if __name__ == '__main__':
        # Extracting data from 5 excel files
        extract = Extract()
        ds,d,os,sa,ea= extract.getAllData()
        # Transforming data to obtain additional columns
        transform = Transform()
        ds,d= transform.transformAllData(ds,d)
        # Dropping unnecessary columns
        dropColumns = DropColumns()
        drivingSearch,delivery,orders,startAddresses,endAddresses= dropColumns.dropAllColumns(ds,d,os,sa,ea)
        # joining the 5 tables
        merge=Merge()
        finalData = merge.mergeAllTables(drivingSearch,delivery,orders,startAddresses,endAddresses)
        # Converting the date columns from Object type to DateTime
        finalData= transform.transformdate(finalData)
        sqlload=SQL_Load()
        sqlload.loadDataToStaging(finalData)

예제 #7

파일 보기

파일: appRuner.py 프로젝트: Sykzen/3-BOT-twitter

def MakeMsg():
    global LastDeath
    global soup
    chdif=CheckNews()
    deathOfLapsus=soup.find_all(class_='page block celebrity blockList')[:chdif]
    listeOfDeathOfLapsus=[]
    for death in deathOfLapsus:
        tempsys={}
        bsfourElementTag=Extract(death)
        tempsys['name']=bsfourElementTag.XtractName()
        tempsys['info']=bsfourElementTag.XtractAreaLife()
        tempsys['text']=bsfourElementTag.XtractText()
        tempsys['tombstone']=bsfourElementTag.XtractDeathCause()
        listeOfDeathOfLapsus.append(tempsys)
    LastDeath=death.a.find('img',title=True)['title']
    return listeOfDeathOfLapsus

예제 #8

파일 보기

        'Content-Type':
        "application/x-www-form-urlencoded",
        'Cookie':
        cookie,
        'Host':
        "zxgk.court.gov.cn",
        'Origin':
        "http://zxgk.court.gov.cn",
        'Referer':
        "http://zxgk.court.gov.cn/shixin/index_form.do",
        'Upgrade-Insecure-Requests':
        "1",
        'User-Agent':
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36",
    }

    data = {
        'pName': info,
        'pCardNum': '',
        'pProvince': 0,
        'pCode': pCode,
        'captchaId': captchaId,
    }

    response = requests.post(findUrl, headers=headers, data=data)
    if_continue = Extract(response.text)
    cookie = Cook(cookie, response.cookies.get_dict())
    cookie = Rep(cookie)
    plt.close()
    print('Close captcha')

예제 #9

파일 보기

파일: techtales.py 프로젝트: davechallis/techtales

	def default(self, url=None):
		if not url:
		    return dumps({'status':'error', 'message':'Need URL'})
		extract = Extract(url, datapath, cachepath, extractorpath)
		data = extract.run()
		return dumps({'status':'ok', 'data':data})

예제 #10

파일 보기

파일: Scraping.py 프로젝트: bipin94179/kijiji_web_scraping

            else:
                break

        logger.debug(
            "Scraping Module : Total Links for : " + str(keyword) + " is : " + str(len(current_advertisment_links)))

        # Getting Search Box Selenium Element to Clear its Text before Inputting Next Text
        search_box = wait.until(EC.element_to_be_clickable((By.ID, 'SearchKeyword')))
        time.sleep(2)
        search_box.clear()
    else:
        browser.close()
        browser.quit()

logger.debug("Scraping Module : Starting Data Scraping")
extract = Extract()
logger.debug("Scraping Module : Final Processing For All Advertisements In Progress")
current_timestamp = extract.extract_data(browser, advertisment_links, finalTimestamp_in_property, HandleProperties(),
                                         logger, province_name, city_name)
updated_date = (date.today() - timedelta(days=1)).strftime("%d/%m/%Y")

location_dictionary = openFile("r", "")
province_dictionary = location_dictionary["province_dict"]
city_dictionary = location_dictionary["city_dict"]
province_name = province_dictionary.get(province_argument)
cities_json = city_dictionary.get(province_argument)
city_json = cities_json.get(city_argument)
city_name = city_json.get("name")

if type_argument == "w":
    search_type = "Wanted"

예제 #11

파일 보기

파일: train_hog_human_detector.py 프로젝트: melcha/hog

import numpy as np
import sys, os

if len(sys.argv) != 3:
	print 'Usage: python train_hog_human_detectory.py [lokacija baze] [mode]'
	print 'mode: 0 - train, 1 - test'
	sys.exit(0)

if os.path.isdir(sys.argv[1]) == False:
	print 'Ne postoji direktorij', sys.argv[1] 
	sys.exit(0)

db = LoadDataSet(sys.argv[1])
cut = CutPicture(64.0, 128.0, 3)
hog = HOG(9, (6,6), (3,3))
ex = Extract(hog, cut)


if int(sys.argv[ 2 ]) == 0:
	print 'Generiranje znacajki...'
	pos, neg = db.loadTrainSet()
	
	lPos = 1178
	lNeg = 1359

	#X, y = ex.getSamples(pos, neg, lPos, lNeg)
	
	X, y = ex.getSamples(pos, neg)
	
	X = np.array(X).astype('float32')
	y = np.array(y).astype('float32')

예제 #12

파일 보기

 def __init__(self, name):
     self.name = name
     self.extract = Extract(name)

예제 #13

파일 보기

파일: ExtractScript.py 프로젝트: oliverjamarie/JS-Web-Parser

    def __init__(self, url: str, parameters:dict):
        Extract.__init__(self, url, parameters)
        self.scripts = []

        self.scripts = Extract.get_from_tag(self,'script')