Python DataExtractor Beispiele, DataExtractor.DataExtractor Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: Collector.py Projekt: KanishkGar/GFFS

 def getData(self):
     file_names, params = self.gen_query(self.lat, self.lon, self.date)
     file_names.reverse()
     params.reverse()
     # print("F: " ,file_names)
     weather = []
     for file_name, param in zip(file_names, params):
         api_call = self.api_format + param
         resp = requests.get(api_call)
         d = DataExtractor(self.oID, resp.content)
         # print(d.maxPrecip(), ":", resp.content)
         # print(d.maxPrecip(), end = ' ')
         weather.append(str(d.maxPrecip()))
         # weather.extend([d.maxPrecip(), d.maxTemp(),    d.maxAirPressure(), d.maxHumidity(), d.maxWind()])
     # print()
     return weather

Beispiel #2

0

Datei anzeigen

 def extract_class_data(self, raw_data):
     extracted_data = []
     for a_class in raw_data:
         a_class_data = DataExtractor(a_class)
         if a_class_data.class_name is not None:
             extracted_data.append(a_class_data)
     return extracted_data

Beispiel #3

0

Datei anzeigen

    def decode(self, img, first_strip=False):
        softstrip_matrix = SoftstripMatrix(img, self.gray_img)
        header_extractor = HeaderExtractor(softstrip_matrix)
        header_extractor.parse_header()
        vertical_sync_start = header_extractor.vertical_sync_start
        self.bits_count = header_extractor.get_bits_per_row()

        if self.config['row_extractor'] == CNN_ROW_EXTRACTOR:
            row_extractor = CnnRowExtractor(softstrip_matrix.grayscale_matrix, softstrip_matrix.binary_matrix, self.bits_count)
            gray_grouped_matrix, grouped_matrix = row_extractor.extract_rows()
        else:
            row_extractor = AlgorithmicRowExtractor(softstrip_matrix, self.bits_count)
            grouped_matrix, gray_grouped_matrix = row_extractor.extract_rows()

        if self.config['row_decoder'] == CNN_ROW_DECODER:
            row_decoder = CnnRowDecoder(gray_grouped_matrix, self.start_time, self.bits_count, self.config['timeout'], vertical_sync_start)
            reduced_pixel_matrix = row_decoder.decode_rows()
        else:
            row_decoder = AlgorithmicRowDecoder(grouped_matrix, self.bits_count, self.start_time, self.config['timeout'])
            reduced_pixel_matrix = row_decoder.decode_rows()

        if len(reduced_pixel_matrix) == 0:
            print('[ERROR] ' + self.path + ' is invalid!')
        else:
            data_extractor = DataExtractor(self.config['timeout'])
            data_extractor.extract_data(reduced_pixel_matrix, first_strip, self.start_time)

            self.data += data_extractor.data

            
            if data_extractor.valid:
                print('Checksum valid!')
                if first_strip:
                    self.strip_meta_info = data_extractor.file_header
                    print(self.strip_meta_info)
            else:
                print('Checksum invalid!')

Beispiel #4

0

Datei anzeigen

    aws_client = AWSClient(aws_access_key_id=aws_access_key_id,
                           aws_secret_access_key=aws_secret_access_key)

    #Constants for Data Extraction
    LOGIN_URL = 'https://www.lendingclub.com/auth/login'
    DOWNLOAD_URL = 'https://www.lendingclub.com/info/download-data.action'
    DIR_PATH = 'Data\DOWNLOAD_LOAN_DATA'

    #Parameters for lending club data scraping
    fileTag = "loanStatsFileNamesJS"
    # email=  os.environ['LENDING_CLUB_EMAIL']
    # password= os.environ['LENDING_CLUB_PASSWORD']

    print('Downloading Files...')
    #Extract Data from Lending Club URL
    de = DataExtractor(email, password)
    de.extractData(LOGIN_URL=LOGIN_URL,
                   DOWNLOAD_URL=DOWNLOAD_URL,
                   fileTag=fileTag)

    print('Ingesting Data...')
    #Ingest Data into Pipeline
    di = DataIngestor(aws_client)

    #Create Landing and Processed Buckets
    LANDING_BUCKET = 'lending-club-landing-data'
    PROCESSED_BUCKET = 'lending-club-processed-data'

    print('Creating Buckets...')
    di.createS3Bucket(LANDING_BUCKET)
    di.createS3Bucket(PROCESSED_BUCKET)

Beispiel #5

0

Datei anzeigen

def get_arguments(argument_list):
    short_options = "d:"
    long_options = ["document="]
    try:
        document_file = ''
        arguments, values = getopt.getopt(argument_list, short_options,
                                          long_options)
        print(arguments)
        if len(arguments) < 1:
            print("Invalid arguments")
            sys.exit(2)
        for t in arguments:
            if t[0] in ("-d", "--document"):
                document_file = t[1]
                print(document_file)
        return document_file
    except getopt.error as err:
        print(str(err))
        sys.exit(2)


if __name__ == '__main__':
    wiki_10_file = get_arguments(sys.argv[1:])
    inverted_index = InvertedIndex()
    data_extractor = DataExtractor()
    structure_file_name = data_extractor.extract_data(wiki_10_file)
    index_file, vector_file = inverted_index.build_term_index(
        structure_file_name, wiki_10_file)
    print("Index file name: ", index_file)
    print("Vector file name: ", vector_file)

Beispiel #6

0

Datei anzeigen

Datei: main.py Projekt: soheil647/Artificial_Intelligent

from DataExtractor import DataExtractor
from TextProcessing import TextProcessing
from TextClassifier import TextClassifier
import pandas as pd

train, target = DataExtractor('data.csv').data_producer('train')
test = DataExtractor('test.csv').data_producer('test')

text_classifier = TextClassifier()

(x_train, y_train), (x_validation,
                     y_validation) = text_classifier.split_validation_data(
                         train, target)

cleaned_x_train = TextProcessing(x_train).clean_text()
cleaned_x_validation = TextProcessing(x_validation).clean_text()
cleaned_x_test = TextProcessing(test).clean_text()

text_classifier.fit(cleaned_x_train, y_train)

text_classifier.evaluate(cleaned_x_validation, y_validation)

text_classifier.confusion(y_validation, cleaned_x_validation)

result = text_classifier.predict(cleaned_x_test)
pd.DataFrame(result, columns=['category']).to_csv('output.csv',
                                                  index=True,
                                                  index_label='index')

Beispiel #7

0

Datei anzeigen

Datei: Data_EL.py Projekt: swathysujit/Chicago-Crimes

def main():

    total_start_time = time.time()

    # ------------------------------------------------------------------------ #
    # 0. PARSE INPUT ARGUMENTS
    # ------------------------------------------------------------------------ #

    data_file_name = "Crime_Weather_Cleaned_2017.csv"
    # data_file_name = "Crime20161718.csv"

    data_file_path = os.path.join(FOLDER_PATH, data_file_name)

    # ------------------------------------------------------------------------ #
    # 1. ESTABLISH DATABASE CONNECTION
    # ------------------------------------------------------------------------ #

    print("\n\n\t\t **** 1. DATABASE CONNECTION **** ")

    # host = 'localhost'
    # database = 'crime_star'
    # user = '******'
    # password = '******'
    # port = '3306'

    port = '3306'

    data_loader = DataLoader()

    ret = data_loader.connect(host=DB_IP,
                              database=DB,
                              user=DB_UNAME,
                              password=DB_PWD,
                              port=port)

    if ret != 1:
        print(" Connection not established. Try again")
        print(" Check internet connectivity")
        return ret

    # ------------------------------------------------------------------------ #
    # 2. DATA EXTRACTION PHASE
    # ------------------------------------------------------------------------ #

    print("\n\n\t\t **** 2. DATA EXTRACTION **** ")

    data_extractor = DataExtractor()
    data_frame = data_extractor.read_csv(fpath=data_file_path,
                                         nrows_to_read=-1)

    # ------------------------------------------------------------------------ #
    # 3. DATA LOADING PHASE
    # ------------------------------------------------------------------------ #

    print("\n\n\t\t **** 2. DATA LOADING **** ")

    ret = data_loader.load_full_table(data_frame, table_name=RAW_TABLE_NAME)

    if ret == -1:
        print(" Could not upload to database ")
        data_loader.disconnect()
        return

    print("Successfully populated database")

    # ------------------------------------------------------------------------ #
    # 4. DISCONNECT THE DATABASE AND CLEAN UP MEMORY
    # ------------------------------------------------------------------------ #

    data_loader.disconnect()

    # ------------------------------------------------------------------------ #
    # 5. SEND A MESSAGE TO THE DATA HUB AS AN UPDATE
    # ------------------------------------------------------------------------ #

    print(" Sending message to data hub for update....", end="")

    messenger = Messenger()
    # Connect to the data hub
    messenger.connect(host=DATA_HUB_IP, uname=DATA_HUB_UNAME, pwd=DATA_HUB_PWD)

    # Connect to the exchange
    messenger.connect_to_exchange(ex_name=EX_NAME)

    # Send update
    message = "Database updated with latest rows"
    messenger.send_message_to_exchange(ex_name=EX_NAME,
                                       message=message,
                                       topic=TOPIC)

    print("sent")

    total_end_time = time.time()

    print(" Total time taken :", total_end_time - total_start_time)

Beispiel #8

0

Datei anzeigen

#!/usr/bin/python

import sys
from DataExtractor import DataExtractor

deExtractor = DataExtractor()
deExtractor.extractData(sys.argv[1])

Beispiel #9

0

Datei anzeigen

Datei: DataWorker.py Projekt: swathysujit/Chicago-Crimes

            "dim_date": dim_date,
            "dim_weather": dim_weather,
            "dim_location": dim_location,
            "dim_crime": dim_crime,
        }
        return star_tables


if __name__ == "__main__":

    folder_path = "C:\\Users\\SSrih\\OneDrive\\UChicago\\DEP\\Project\\data" \
                  "\\Crime and Weather\\"
    # data_file_name = "CrimeWeather2010.csv"
    # data_file_name = "Crime2010Raw.csv"
    data_file_name = "Crime20161718.csv"

    data_file_path = os.path.join(folder_path, data_file_name)
    data_extractor = DataExtractor()
    data_frame = data_extractor.read_csv(fpath=data_file_path,
                                         nrows_to_read=5000)

    # print(data_frame.head())

    data_worker = DataWorker()

    print(data_frame.isnull().sum().sum())

    data_worker.process_pipeline(data_frame)

    print(data_frame.isnull().sum().sum())

Beispiel #10

0

Datei anzeigen

Datei: extract_data.py Projekt: SUTURO/euroc_ml

#!/usr/bin/python

import sys
from DataExtractor import DataExtractor


deExtractor = DataExtractor()
deExtractor.extractData(sys.argv[1])

Beispiel #11

0

Datei anzeigen

Datei: CrawlerThread.py Projekt: exsonic/FatSecret_Crawler

 def run(self):
     try:
         logInfo(self.userId, 'start crawling')
         de = DataExtractor()
         de.getServerId(self.userId)
         de.getWeightHistory(self.userId)
         de.getDietHistory(self.userId)
         de.getGroup(self.userId)
         de.getChallenge(self.userId)
         de.getBuddy(self.userId)
         logInfo(self.userId, 'Done crawling')
     except Exception as e:
         logException(self.userId, self.run.__name__, e)

Beispiel #12

0

Datei anzeigen

Datei: main.py Projekt: GouthamHM/Yelp_Sentiment_Visualisation

from DataExtractor import DataExtractor
from Geolocation import Location
from DataFilter import DataFilter

import json

dataExObj = DataExtractor(dir='./dataset')
allFiles = dataExObj.FileList(ext='.json')

jsonFileDict = dataExObj.parse(allFiles)

#Data Filtering

#Step 1. Join reviews and business by business id's
datafilter = DataFilter()
data = datafilter.JoinByAttribute(jsonFileDict['business'],
                                  jsonFileDict['review'], 'business_id')

#Step 2: Get the location.

loc = Location()

for key, value in data.items():
    latitude = data[key]['latitude']
    longitude = data[key]['longitude']
    location = loc.GetLocation(latitude, longitude)
    data[key] = datafilter.merge_dicts(data[key], {'location': location})
    print(data[key]['location'])

#TODO Step 3
#Step 3: Filter out location outside United States.

Beispiel #13

0

Datei anzeigen

from DataExtractor import DataExtractor
if __name__ == 'main':
    Extractor = DataExtractor()
    R = Extractor.Extract("realDonaldTrump")
    print(R)

Extractor = DataExtractor()
R = Extractor.Extract("realDonaldTrump")
print(R)

Beispiel #14

0

Datei anzeigen

import os
from DataExtractor import DataExtractor
from StridesIdentifier import StridesIdentifier
from FeaturesExtractor import FeaturesExtractor
from ClassifiersEvaluator import ClassifiersEvaluator
from Train import Train
from InfluxCalculator import InfluxCalculator
from Classifier import Classifier

models_dir = '../models'
# if train models folder is empty
if len(os.listdir(models_dir)) == 0:
    # train phase
    #function to extract frames from videos and to create txt files with data of interest from each video
    de = DataExtractor('train')
    videos_list = de.extractor()
    #function to identifier strides from data (in txt files) about each video and to store strides informations in txt files
    si = StridesIdentifier(videos_list)
    si.identifier()
    #function to extract all features to analize about each person
    fe = FeaturesExtractor(videos_list)
    people_list = fe.extractor()
    #function to evaluate classifiers
    ce = ClassifiersEvaluator(people_list)
    ce.evaluator()
    #function to train classifiers
    t = Train(people_list)
    t.training()
#test phase
#function to extract frames from videos and to create txt files with data of interest from each video
de = DataExtractor('test')

Beispiel #15

0

Datei anzeigen

Datei: Assignment.py Projekt: adamkona/InformationExtraction

# Clock to time what the running of the program
start_time = time.time()

# Setting up directory
mypath = os.getcwd() + '/data/seminar_testdata/test_untagged/'
trainingPath = 'data/training'
directory = os.fsencode(mypath)

# Runs the ontology classification
print('Running Ontology Classification: \n')
ontology = Ontology()
ontology.run(mypath)

# Begins tagging
print("\nTagging progress beginning. Get a brew, it'll take a while... \n")
extractor = DataExtractor()

# Trains our model
extractor.train(trainingPath)
tagger = Tagger()

# Tags all emails in the directory given
tagger.tag_seminar(mypath, directory, extractor)

# Calculates how long the program took
seconds = time.time() - start_time
m, s = divmod(seconds, 60)
print("The program has been running for {0} minutes and {1} seconds \n".format(
    round(m), round(s)))

# Evaluates results

Beispiel #16

0

Datei anzeigen

from sklearn.linear_model import SGDClassifier
from sklearn import svm, preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from DataExtractor import DataExtractor

trainExtractor = DataExtractor(5)
trainData = trainExtractor.featureDictionary
for i in range(6, 10):
    trainData.update(DataExtractor(i).featureDictionary)

trainInput = list()
trainOutput = list()

for value in trainData.values():
    inputData = value[0]
    trainInput.append(inputData[0].values() + inputData[1].values())
    trainOutput.append(1 if value[1] == 1 else 0)

#clf = SGDClassifier(loss="log", penalty="elasticnet")
#clf = SGDClassifier(loss="hinge")
clf = svm.SVC()
#clf = GradientBoostingClassifier(n_estimators=30, max_depth=3, subsample=.7)
#clf = KNeighborsClassifier(n_neighbors=3)
scaledTrainInput = preprocessing.scale(trainInput)
clf.fit(trainInput, trainOutput)

testExtractor = DataExtractor(10)
testData = testExtractor.featureDictionary
for i in range(11, 13):
    testData.update(DataExtractor(i).featureDictionary)

Beispiel #17

0

Datei anzeigen

Datei: Classifiers.py Projekt: soheil647/Artificial_Intelligent

 def __init__(self, filepath):
     # self.x_train, self.y_train = DataExtractor(filepath).split_labels()
     self.x_train, self.x_test, self.y_train, self.y_test = DataExtractor(
         filepath).split_validation()

Beispiel #18

0

Datei anzeigen

from DataExtractor import DataExtractor
import sys
username = sys.argv[1]
KEY = sys.argv[2]
SECRET = sys.argv[3]
filters = ""
Extractor = DataExtractor()
Extractor.Extract(username, filters, KEY, SECRET)

Beispiel #19

0

Datei anzeigen

from DataExtractor import DataExtractor

Extractor = DataExtractor()
Extractor.Extract()