예제 #1
0
 def __init__(self, dataSource, dataSet):
     extractObj = Extract()
     if dataSource == 'api':
         self.data = extractObj.getAPISData(dataSet)
     elif dataSource == 'csv':
         self.data = extractObj.getCSVData(dataSet)
     else:
         self.data = extractObj.databases(dataSet)
예제 #2
0
	def default(self, url1=None, url2=None, field=None):
		if not url1 or not url2 or not field:
			return dumps({'status':'error', 'message':'Need URIs and field'})
		extract1 = Extract(url1, datapath, cachepath, extractorpath) 
		extract2 = Extract(url2, datapath, cachepath, extractorpath)
		data1 = extract1.run()
		data2 = extract2.run()
		chart = ComparisonChart(url1, url2, data1, data2)
		url = chart.get_graph_url_for_field(field)
		return dumps({'status':'ok', 'url':url})		
예제 #3
0
    def process_image(self, image):
        file_converter = FileConverter()
        #convert the image to byte string
        image_bytes = file_converter.png_to_jpeg(image)

        scanner = Scanner()
        #scan the image and give it a birds eye view, returns a np of pixels that makes up the image
        scan_np = scanner.scan(image_bytes)

        #extract the individual answers from the scanned test
        extractor = Extract()
        answers = extractor.get_all_answers(scan_np, 5)

        color = Color()
        bw_answers = color.all_ans_to_bw(answers)

        size = Size()
        DIM = (28, 28)
        shrunk_images = size.shrink_images(bw_answers, DIM)

        #convert the answer images to a single array, which we used in training our model
        answers_flat = file_converter.convert_images(
            shrunk_images)  #returns image as (1, 28, 28, 1) and as type float

        #now that we have a list of images of the answers as bw 1D numpy arrays,
        # we can run them through our model and grade them
        # first we need to load our model
        model_loader = ModelLoader()
        MODEL_JSON = 'models/modified_model_98.json'
        MODEL_WEIGHTS = 'models/modified_model_98.h5'
        model = model_loader.load_model_2(MODEL_JSON, MODEL_WEIGHTS)
        #compile model
        model.compile(optimizer=RMSprop(lr=0.001),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        grader = Grader()
        answers = grader.get_answers(answers_flat, model)

        #get the images as a 784 (28x28) length string so we can store the data in a database
        ans_strings = file_converter.get_string_images(answers_flat)
        compressed_images = file_converter.compress_images(ans_strings)

        #add the images to database so we can create a large dataset of handwritten letters
        # storage = Storage()
        # storage.insert(answers, compressed_images)

        return answers
예제 #4
0
	def default(self, url=None, fields=None):
		if not url or not fields:
			return dumps({'status':'error', 'message':'Need URI and fields'})
		extract = Extract(url, datapath, cachepath, extractorpath)
		data = extract.run()
		chart = GoogleChart(data)
		field_arr = fields.split(",")
		nfield_arr = []
		for field in field_arr:
			field = field.strip()
			if field != '':
				nfield_arr.append(field)

		if len(nfield_arr) == 1:
			url = chart.get_graph_url_for_field(nfield_arr[0])
		else:
			url = chart.get_graph_url_for_fields(nfield_arr)
		return dumps({'status':'ok', 'url':url})		
예제 #5
0
class Transform:
    def __init__(self, name):
        self.name = name
        self.extract = Extract(name)

    def get_sentiment(self, text):
        sentimentBlob = TextBlob(text)
        if (sentimentBlob.sentiment.polarity > 0):
            return ('Positive')
        else:
            return ('Negative')

    def get_stock_data(self):
        data = self.extract.get_stockprice_extract()
        stock_data = data.json()
        return stock_data

    def get_ner_dict(self, description):
        ner_list = []
        spacy_nlp = en_core_web_sm.load()
        document = spacy_nlp(description)
        for element in document.ents:
            ner_list.append(str(element))
        return ner_list

    def get_news_data(self):
        data = self.extract.get_news_extract()
        for article in data['articles']:
            if article['description'] is not None:
                sentiment = self.get_sentiment(article['description'])
                article['sentiment'] = sentiment
                ner_tags = self.get_ner_dict(article['description'])
                article['ner_tags'] = ner_tags
            else:
                article['sentiment'] = 'None'
                article['ner_tags'] = 'None'

        return data
예제 #6
0
class Driver:
    if __name__ == '__main__':
        # Extracting data from 5 excel files
        extract = Extract()
        ds,d,os,sa,ea= extract.getAllData()
        # Transforming data to obtain additional columns
        transform = Transform()
        ds,d= transform.transformAllData(ds,d)
        # Dropping unnecessary columns
        dropColumns = DropColumns()
        drivingSearch,delivery,orders,startAddresses,endAddresses= dropColumns.dropAllColumns(ds,d,os,sa,ea)
        # joining the 5 tables
        merge=Merge()
        finalData = merge.mergeAllTables(drivingSearch,delivery,orders,startAddresses,endAddresses)
        # Converting the date columns from Object type to DateTime
        finalData= transform.transformdate(finalData)
        sqlload=SQL_Load()
        sqlload.loadDataToStaging(finalData)
예제 #7
0
def MakeMsg():
    global LastDeath
    global soup
    chdif=CheckNews()
    deathOfLapsus=soup.find_all(class_='page block celebrity blockList')[:chdif]
    listeOfDeathOfLapsus=[]
    for death in deathOfLapsus:
        tempsys={}
        bsfourElementTag=Extract(death)
        tempsys['name']=bsfourElementTag.XtractName()
        tempsys['info']=bsfourElementTag.XtractAreaLife()
        tempsys['text']=bsfourElementTag.XtractText()
        tempsys['tombstone']=bsfourElementTag.XtractDeathCause()
        listeOfDeathOfLapsus.append(tempsys)
    LastDeath=death.a.find('img',title=True)['title']
    return listeOfDeathOfLapsus
예제 #8
0
        'Content-Type':
        "application/x-www-form-urlencoded",
        'Cookie':
        cookie,
        'Host':
        "zxgk.court.gov.cn",
        'Origin':
        "http://zxgk.court.gov.cn",
        'Referer':
        "http://zxgk.court.gov.cn/shixin/index_form.do",
        'Upgrade-Insecure-Requests':
        "1",
        'User-Agent':
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36",
    }

    data = {
        'pName': info,
        'pCardNum': '',
        'pProvince': 0,
        'pCode': pCode,
        'captchaId': captchaId,
    }

    response = requests.post(findUrl, headers=headers, data=data)
    if_continue = Extract(response.text)
    cookie = Cook(cookie, response.cookies.get_dict())
    cookie = Rep(cookie)
    plt.close()
    print('Close captcha')
예제 #9
0
	def default(self, url=None):
		if not url:
		    return dumps({'status':'error', 'message':'Need URL'})
		extract = Extract(url, datapath, cachepath, extractorpath)
		data = extract.run()
		return dumps({'status':'ok', 'data':data})
예제 #10
0
            else:
                break

        logger.debug(
            "Scraping Module : Total Links for : " + str(keyword) + " is : " + str(len(current_advertisment_links)))

        # Getting Search Box Selenium Element to Clear its Text before Inputting Next Text
        search_box = wait.until(EC.element_to_be_clickable((By.ID, 'SearchKeyword')))
        time.sleep(2)
        search_box.clear()
    else:
        browser.close()
        browser.quit()

logger.debug("Scraping Module : Starting Data Scraping")
extract = Extract()
logger.debug("Scraping Module : Final Processing For All Advertisements In Progress")
current_timestamp = extract.extract_data(browser, advertisment_links, finalTimestamp_in_property, HandleProperties(),
                                         logger, province_name, city_name)
updated_date = (date.today() - timedelta(days=1)).strftime("%d/%m/%Y")

location_dictionary = openFile("r", "")
province_dictionary = location_dictionary["province_dict"]
city_dictionary = location_dictionary["city_dict"]
province_name = province_dictionary.get(province_argument)
cities_json = city_dictionary.get(province_argument)
city_json = cities_json.get(city_argument)
city_name = city_json.get("name")

if type_argument == "w":
    search_type = "Wanted"
예제 #11
0
import numpy as np
import sys, os

if len(sys.argv) != 3:
	print 'Usage: python train_hog_human_detectory.py [lokacija baze] [mode]'
	print 'mode: 0 - train, 1 - test'
	sys.exit(0)

if os.path.isdir(sys.argv[1]) == False:
	print 'Ne postoji direktorij', sys.argv[1] 
	sys.exit(0)

db = LoadDataSet(sys.argv[1])
cut = CutPicture(64.0, 128.0, 3)
hog = HOG(9, (6,6), (3,3))
ex = Extract(hog, cut)


if int(sys.argv[ 2 ]) == 0:
	print 'Generiranje znacajki...'
	pos, neg = db.loadTrainSet()
	
	lPos = 1178
	lNeg = 1359

	#X, y = ex.getSamples(pos, neg, lPos, lNeg)
	
	X, y = ex.getSamples(pos, neg)
	
	X = np.array(X).astype('float32')
	y = np.array(y).astype('float32')
예제 #12
0
 def __init__(self, name):
     self.name = name
     self.extract = Extract(name)
예제 #13
0
    def __init__(self, url: str, parameters:dict):
        Extract.__init__(self, url, parameters)
        self.scripts = []

        self.scripts = Extract.get_from_tag(self,'script')