def extractFile(tarball): print "<DataLoader> Extracting file from: %s" % tarball sourceFile = DataExtractor.extractData(tarball) formattedFile = "formatted_" + sourceFile return sourceFile, formattedFile
def extractFile(tarball): print "<DataLoader> Extracting file from: %s" % tarball sourceFile = DataExtractor.extractData(tarball) formattedFile = "formatted_"+sourceFile return sourceFile, formattedFile
def predictOutput(image="DATA/mask/15_h.jpg", weights='model_w_new_512.h5', output_name='512result', channel=1, grey=False, dim=(512, 512, 1), f_ratio=[0.5]): IMAGE_PATH = image WEIGHTS_PATH = weights OUTPUT_PATH = output_name CHANNEL = channel GREY = grey model = net.NeuralNetwork() extractor = de.DataExtractor() model.load(dim, WEIGHTS_PATH) data = extractor.extractData(IMAGE_PATH, None, channel=CHANNEL, shape=dim, grey=GREY) #data=reframe(data,dim) shape = extractor.shape2(IMAGE_PATH, shape=dim) result = model.predictSet(data, shape, dim) io.imsave(OUTPUT_PATH + '.jpg', result) for ratio in f_ratio: im = scipy.misc.toimage(extractor.cutOff(result, ratio)) im.save(OUTPUT_PATH + '_' + str(ratio) + '_filtered.png') gc.collect()
def main(): if len(sys.argv) == 1: helpFunc() return if sys.argv[1] == "help": helpFunc(sys.argv[2:]) return if len(sys.argv) >= 3: configFile = sys.argv[1] action = sys.argv[2].lower() args = sys.argv[3:] else: return if action == "configure": BigQuery_Configuration.configureBigquery(configFile, args) return if action != "load": helpFunc() return #get parameters defaults = BigQuery_Configuration.loadConfig(configFile) args, params = BigQuery_Configuration.loadParams(args) for key in defaults: if key not in params: params[key] = defaults[key] #check source source = args[0] try: if os.path.isdir(source): print "<DataLoader> Source is a directory" for tarball in [f for f in os.listdir(source) if \ os.path.isfile(os.path.join(source, f))]: print "<DataLoader> Loading file: %s" % tarball loadData(os.path.join(source, tarball), params) else: loadData(source, params) DataExtractor.cleanup() except Exception as e: print e
def learnNetwork(image="DATA/mask/05_h.jpg", mask="DATA/mask/05_h.tif", mode='create', old_weights='model_w_new_128.h5', new_weights='model_w_new_128.h5', epochs=1, channel=1, gray=False, dim=(128, 128, 1)): IMAGE_PATH = image MASK_PATH = mask WEIGHTS_PATH = old_weights NEW_WEIGHTS_PATH = new_weights EPOCH_NUMBER = epochs CHANNEL = channel GREY = gray MODE = mode model = net.NeuralNetwork() extractor = de.DataExtractor() if MODE == 'create': model.create(dim) elif MODE == 'learn': model.load(dim, WEIGHTS_PATH) else: print('Bad mode. End of script...') sys.exit() train = extractor.extractData(IMAGE_PATH, MASK_PATH, channel=CHANNEL, shape=dim, grey=GREY) model.learn(train, epoch=EPOCH_NUMBER) model.save_weights(NEW_WEIGHTS_PATH) del train del model gc.collect()
import DataExtractor import ConnectionManager dataExtractor = DataExtractor.DataExtractor() ConnectionManager.connManager.registerReceiver(dataExtractor)
#Avi Patel,1143213,Melbourne #Monit Patel,1135025,Melbourne #Prabha Choudhary,1098776,Melbourne #Shubham Parakh,1098807,Melbourne #-------------------------------- import plotly.graph_objs as go from app import * import DataExtractor as de year = [2017,2018,2019,2020] #Brisbane layout = go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)',xaxis={'tickformat':'d'}) twitter_positive_coalition_b = go.Scatter(x=year,y=de.positive_coalition("Brisbane"), name = "Coalition") twitter_positive_labor_b = go.Scatter(x=year,y=de.positive_labor("Brisbane"), name="Labor") positive_b = [twitter_positive_coalition_b , twitter_positive_labor_b ] fig_positive_b = go.Figure(data=positive_b, layout=layout) fig_positive_b.update_layout(xaxis_title="Year",yaxis_title="Percentage") twitter_negative_coalition_b = go.Scatter(x=year,y=de.negative_coalition("Brisbane"), name = "Coalition") twitter_negative_labor_b = go.Scatter(x=year,y=de.negative_labor("Brisbane"), name = "Labor") negative_b = [twitter_negative_coalition_b,twitter_negative_labor_b] fig_negative_b = go.Figure(data=negative_b, layout=layout) fig_negative_b.update_layout(xaxis_title="Year",yaxis_title="Percentage") aurin_data_2016_b = go.Bar(x=[2016,2019],y=de.aurinPartyData2016("Brisbane"),name="Coalition") aurin_data_2019_b = go.Bar(x=[2016,2019],y=de.aurinPartyData2019("Brisbane"),name = "Labor")
""" import gc import numpy as np import network as net import DataExtractor as de from skimage import io dim = (8, 8, 1) if dim[2] == 1: g = True else: g = False model = net.NeuralNetwork() extractor = de.DataExtractor() trainTemp = extractor.extractData("DATA/mask/01_h.jpg", "DATA/mask/01_h.tif", shape=dim, grey=g) train = (np.reshape(trainTemp[0], (trainTemp[0].shape[0], dim[2], dim[0], dim[1])), trainTemp[1]) del trainTemp model.create(size=dim) model.learn(train, epoch=10) trainTemp = extractor.extractData("DATA/mask/02_h.jpg",
import sys import pandas as pd sys.path.insert(0, "function/") from DataExtractor import * from RegClf import * #data extraction data = DataExtractor() X_train, X_test, Y_train, Y_test = data.extract() #fit and choose are regressions regression = RegressionClassifier(X_train, X_test, Y_train, Y_test) #choose are regression types regression.defineClassifierType([2, 3, 4, 5, 6]) regression.fit() regression.predict() regression.score() bestReg = regression.mostAccurateScore() regression.plotGraph(data.getRawExtract())
#Team Members #Akshay Agarwal, 1141290,Melbourne #Avi Patel,1143213,Melbourne #Monit Patel,1135025,Melbourne #Prabha Choudhary,1098776,Melbourne #Shubham Parakh,1098807,Melbourne #-------------------------------- import requests import plotly.graph_objs as go import dash_html_components as html import dash_core_components as dcc import DataExtractor as de cities = ["Adelaide", "Brisbane", "Melbourne", "Perth", "Sydney"] colors1 = ["red", "orange", "blue", "green", "yellow"] unique_user = de.unique_user_cities() voters = de.voters_data() twitter = go.Pie(labels=cities, values=unique_user) aurin = go.Pie(labels=cities, values=voters) tab_2_layout = html.Div([ html.Div([ html.H3('Distribution of twitter users across cities', style={'textAlign': 'center'}), dcc.Graph(id='graph', figure={ 'data': [twitter], }, style={'padding-right': '1px'}) ],
from DataExtractor import * data = DataExtractor(11) data10 = DataExtractor(10) data9 = DataExtractor(9) print len(data.featureDictionary.items()) print len(data10.featureDictionary.items()) data.featureDictionary.update(data10.featureDictionary) print len(data.featureDictionary.items())
#Team Members #Akshay Agarwal, 1141290,Melbourne #Avi Patel,1143213,Melbourne #Monit Patel,1135025,Melbourne #Prabha Choudhary,1098776,Melbourne #Shubham Parakh,1098807,Melbourne #-------------------------------- import dash_html_components as html import dash_core_components as dcc import plotly.graph_objs as go import DataExtractor as de cities = ["Adelaide", "Brisbane", "Melbourne", "Perth", "Sydney"] age = ["Youth", "Adult", "Senior-citizens"] melbourne_count = de.age_data("Melbourne") sydney_count = de.age_data("Sydney") perth_count = de.age_data("Perth") adelaide_count = de.age_data("Adelaide") brisbane_count = de.age_data("Brisbane") unique_user = de.unique_user_cities() twitter = go.Pie(labels=cities, values=unique_user) youth_data = [ adelaide_count[0], brisbane_count[0], melbourne_count[0], perth_count[0], sydney_count[0] ] aurin_data = go.Bar(x=cities, y=youth_data) tab_3_layout = html.Div([
import learning, util, sys, DataExtractor from learning import * from DataExtractor import DataExtractor learner = StochasticGradientLearner(footballFeatureExtractor) data = list() for i in xrange(5, 13): data.append(DataExtractor(i).featureDictionary) train = dict() test = dict() for i in xrange(0, 4): train.update(data[i]) for i in xrange(4, 8): test.update(data[i]) from optparse import OptionParser parser = OptionParser() def default(str): return str + ' [Default: %default]' parser.add_option( '-f', '--featureExtractor',
import DataExtractor import Setting import CNNNetWork Dict = DataExtractor.ReadMatToDict(Setting.SourceData_dir) TrainData = DataExtractor.GetTrainDSFromDict(Dict) TrainL = DataExtractor.GetTrainLFromDict(Dict) OxfordCNN = CNNNetWork.CNN(TrainData, TrainL) OxfordCNN.StartTrain()
'DataProcessing', # runs on multiple cpu 'DataShuffling', # runs on multiple cpu 'Learner', # runs on gpu 'BestPicks', ] if __name__ == '__main__': if 'PlayersListing' in to_execute: import PlayersListing PlayersListing.run(m) if 'DataDownloader' in to_execute: import DataDownloader DataDownloader.run(m) if 'DataExtractor' in to_execute: import DataExtractor DataExtractor.run(m, cpu) if 'RoleUpdater' in to_execute: import RoleUpdater RoleUpdater.run(m) if 'DataProcessing' in to_execute: import DataProcessing DataProcessing.run(m, cpu) if 'DataShuffling' in to_execute: import DataShuffling DataShuffling.run(m, shuffling_files, keep_for_testing, cpu) if 'Learner' in to_execute: import Learner Learner.run(m, n, restore) if 'BestPicks' in to_execute: import BestPicks BestPicks.run(m, n)
def __init__(self, filename, columns=None, background=False, outputDirectory=None, startColumn=None, saveImages=True): self.dataExtractor = DataExtractor.DataExtractor(filename) self.plotDataWriter = PlotDataWriter.PlotDataWriter() self.saveImages = saveImages self.dataPlot = DataPlot.DataPlot(self.done, self.skip, self.dump, self.exit, self.prev, background) self.background = background availablePlotColumns = self.dataExtractor.getAvailableColumns('background' if background else 'plot') try: if outputDirectory == None: filename = os.path.basename(filename) + '_results_' + str(int(time.time())) self.outputDirectory = os.path.join(os.getcwd(), filename) else: self.outputDirectory = outputDirectory # don't allow overwriting of directories if os.path.exists(self.outputDirectory): print("ERROR: Output directory already exists. Choose another directory or remove the exisiting output directory before continuing.") sys.exit() # attempt to make the output directory if it doesn't exist if not os.path.exists(self.outputDirectory): os.makedirs(self.outputDirectory) # attempt to add the image directory if it's required if saveImages: self.imageDirectory = os.path.join(self.outputDirectory, 'plots') if not os.path.exists(self.imageDirectory): os.makedirs(self.imageDirectory) print("Files will be output to: " + self.outputDirectory) except Exception as e: print("There was a problem creating the output directories.") print(e) sys.exit() # availableBackgroundcolumns = self.dataExtractor.getAvailableColumns('background') # # backgroundDataWriter = BackgroundDataWriter.BackgroundDataWriter() # for column in availableBackgroundcolumns: # _, yData = self.dataExtractor.extractData(column) # backgroundDataWriter.writeLine(column, sum(yData) / len(yData), np.var(yData)) # backgroundDataWriter.writeToFile(self.outputDirectory) # print("Background data processing completed.") #Ensure that the column names are valid if columns != None: errorColumns = [] for column in columns: if not column in availablePlotColumns: errorColumns.append(column) if len(errorColumns) != 0: print("Fatal Error! these column(s) do not exist for this mode: " + ', '.join(x for x in errorColumns)) sys.exit() self.columnsToPlot = columns elif startColumn != None: try: idx = availablePlotColumns.index(startColumn) self.columnsToPlot = availablePlotColumns[idx:] except: print("Start column: " + startColumn + " was not found.") else: self.columnsToPlot = availablePlotColumns if len(self.columnsToPlot) == 0: print("No columns to plot. Exiting.") sys.exit() self.currentPlotIndex = 0 self.printGuide() self.plotNext()
def extractor(images,date): # initialize empty text text = '' # Extraire le text des images for image in images: text += DataExtractor.getText(image) text = text.lower() # obtenir le nombre de tests tests = DataExtractor.getTests(text) text = text[tests["endIndex"]:-1] numbers = tests['numbers'] tests_realises = numbers['tests'] cas_positifs = numbers['positifs'] taux = numbers['taux'] #print(tests) # les cas contact contacts = DataExtractor.getCasContact(text) text = text[contacts["endIndex"] :-1] cas_contact = contacts['number'] #print(contacts) # les cas importes #cas_importes = DataExtractor.getcasImportes(text) #print("cas_importes: ",cas_importes) # les cas communautaires cas_com = DataExtractor.getCasCom(text) text = text[cas_com["endIndex"] :-1] cas_coms = cas_com['number'] #print(cas_com) # Truncate text to listing of cases per city try: text = text[text.index("comme suit")+12:] except: try: text = text[text.index(":")+1:-1] except: print('Unable to Gather More Information!') #print(text) # get and print array of location and there number of cases cas = DataExtractor.getCityCases(text) text = text[cas["endIndex"] :-1] #print(cas) #print(DataExtractor.getNbGueris(text)) nombre_gueris = int(DataExtractor.getNbGueris(text)) #nombre_gueris = DataExtractor.getNbGueris(text) #print("cas gueris ",nombre_gueris) nombre_deces = int(DataExtractor.getDeces(text)) #print("deces: ",nombre_deces) total = DataExtractor.getOverall(text) #print(total) cases = DataExtractor.exportIntoJson(cas["cas"]) #print(cases) DataExtractor.exportToFile(date,tests_realises,cas_positifs,cas_contact,cas_coms,nombre_gueris,nombre_deces,cases)