def mutual_info(x, y): import numpy as np x = discretize_feature(x) c = 0 rX = np.unique(x) rY = np.unique(y) pX = pd(x) pY = pd(y) pX_Y = pD(x, y) Aentropy = 0.0 for t1 in rX: if pX[t1] != 0: Aentropy -= pX[t1] * np.log2(pX[t1]) Bentropy = 0.0 for t1 in rY: if pY[t1] != 0: Bentropy -= pY[t1] * np.log2(pY[t1]) ABentropy = 0.0 for t1 in rX: for t2 in rY: if pX_Y[(t1, t2)] != 0: ABentropy -= pX_Y[(t1, t2)] * np.log2(pX_Y[(t1, t2)]) # else: # print(0) #print(iX_Y) return (Aentropy + Bentropy - ABentropy) / ( Aentropy + Bentropy) # returns normalized mutual information gain
def __init__(self, force_build=False): self.pipeline_steps = None self.corpus_ = None self.phrases = None pd = PickleDef(self) self.pickle_kwargs = pd() super(Preprocessor, self).__init__(force_build, **self.pickle_kwargs)
def predict_api(): ''' For direct API calls trought request ''' data = request.get_json(force=True) prediction = model.predict([pd(list(data.values()))]) output = prediction[0] return jsonify(output)
def _dump_csv(self, path): """Dump the information to a csv file. Input: - path: the output file. """ dataframe = pd() with open(path, 'w') as f: f.write(dataframe.to_csv())
def file_open(self): fileName = QtWidgets.QFileDialog.getOpenFileName(Rule_Priority_test, 'Open csv' , QtCore.QDir.rootPath() , '*.csv') #df1 = pd.DataFrame() #df1 = pd.concat([pd.read_csv(f) for f in glob.glob('*.csv')] , ignore_index=True) print(filename) path = self.lineEdit.text(fileName) df1 = pd.read_csv(path) df2 = pd.read_csv('export.csv') df = pd.merge(df2, df1, how='inner').dropna(axis="columns") model = pd(df) self.tableView.setModel(model)
def memory_cleaner(): proc = psutil.Process(os.getpid()) gc.collect() mem0 = proc.memory_info().rss # create approx. 10**7 int objects and pointers foo = ['abc' for x in range(10**7)] mem1 = proc.memory_info().rss # unreference, including x == 9999999 del foo mem2 = proc.memory_info().rss # collect() calls PyInt_ClearFreeList() # or use ctypes: pythonapi.PyInt_ClearFreeList() gc.collect() mem3 = proc.memory_info().rss pd = lambda x2, x1: 100.0 * (x2 - x1) / mem0 print("Allocation: %0.2f%%" % pd(mem1, mem0)) print("Unreference: %0.2f%%" % pd(mem2, mem1)) print("Collect: %0.2f%%" % pd(mem3, mem2)) print("Overall: %0.2f%%" % pd(mem3, mem0))
def crearCurvas(): pArena = np.array([[-1.5, 2.66], [-1, 2.64], [0.5, 2.605], [2, 2.57], [5, 2.51], [8, 2.46], [10, 2.43], [20, 2.26], [25, 2.18], [31, 2.08], [36, 2.0], [40.5, 1.92]]) pCaliza = np.array([[3, 2.66], [36, 2.1]]) pDolomita = np.array([[4, 2.86], [8.5, 2.82], [13, 2.76], [18, 2.68], [23, 2.59], [27, 2.5], [32, 2.4], [36, 2.3], [38, 2.26], [41, 2.18], [44, 2.1]]) pLutita = np.array([[30, 2.8], [40, 2.6], [41, 2.5]]) arena = [] caliza = [] dolomita = [] lutita = [] za = np.polyfit(pArena[:, 0], pArena[:, 1], 5) pa = np.poly1d(za) zc = np.polyfit(pCaliza[:, 0], pCaliza[:, 1], 1) pc = np.poly1d(zc) zd = np.polyfit(pDolomita[:, 0], pDolomita[:, 1], 5) pd = np.poly1d(zd) zl = np.polyfit(pLutita[:, 0], pLutita[:, 1], 1) pl = np.poly1d(zl) for i in range(45): arena.append([0.93 * i - 1.5, pa(0.93 * i - 1.5)]) caliza.append([1 * i, pc(1 * i)]) dolomita.append([0.94 * i + 2.5, pd(0.94 * i + 2.5)]) for i in range(20): lutita.append([0.7 * i + 30, pc(1 * i) + 0.1]) arena = np.array(arena) caliza = np.array(caliza) dolomita = np.array(dolomita) lutita = np.array(lutita) return (arena, caliza, dolomita, lutita)
def marginal_likelihood(self, W0): a = self.sigx*np.identity(self.dimx) win = lambda w: np.dot(w, w.transpose()) + a const = lambda w: -(self.n/2.0)*np.log( np.linalg.det(win(w)) ) pdin = lambda w: np.linalg.inv( win(w) ) pd = lambda w,i: np.dot(np.dot(self.observed[i].transpose(), pdin(w)), self.observed[i]) final = lambda w: sum(pd(w, i) for i in range(self.n)) evidence = lambda w: - const(w) + 0.5*final(w) gradient = grad(evidence) ans, a = util.gradient_descent(evidence, W0) #plot learning curve plt.plot(a) plt.show() return ans
def Grapy(np_data, graph): graph.delete_all() # tx = graph.begin() # 创建Node结点对象 # 结点对象有lable 标签 和 属性 # person 是其lable 标签, 有一个属性name for row in np_data: device, vul, patch = row[0], row[1], row[2] # 设备 if not len(graph.nodes.match("device", name=device)): denode = Node('device', name=device) graph.create(denode) res_device = graph.nodes.match('device', name=device).first() # 漏洞 if not len(graph.nodes.match("vul", name=vul)): vulnode = Node('vul', name=vul) graph.create(vulnode) res_vul = graph.nodes.match('vul', name=vul).first() # 设备-漏洞 dv = Relationship.type("device-vul") graph.merge(dv(res_device, res_vul)) # 补丁 if not len(graph.nodes.match("patch", name=patch)): patchnode = Node('patch', name=patch) graph.create(patchnode) res_patch = graph.nodes.match('patch', name=patch).first() # 漏洞-补丁 vp = Relationship.type("vul-patch") graph.merge(vp(res_vul, res_patch)) # 补丁-设备 pd = Relationship.type("patch-device") graph.merge(pd(res_patch, res_device))
def __init__(self, file=None): # defines allowed extensions self.allowed_extensions = ['txt', 'csv'] self.data_dir = DATA_DIR self.read_dfs = {} pd = PickleDef(self) self.pickle_kwargs = pd() super(Read, self).__init__(**self.pickle_kwargs) # logic to read in new files if file: if type(file) == list: for f in file: if f in self.read_dfs: print(f'{f} already has been read \n skipping {f}') pass else: self.__readFilesDataFrames(file) else: if file in self.read_dfs: print(f'{file} already has been read \n using loaded object') else: self.__readFilesDataFrames(file)
def marginal_likelihood(self, W0): a = self.sigx*np.identity(self.dimx) win = lambda w: np.dot(w, w.transpose()) + a const = lambda w: -(self.n/2.0)*np.log( np.linalg.det(win(w)) ) pdin = lambda w: np.linalg.inv( win(w) ) pd = lambda w,i: np.dot(np.dot(self.observed[i].transpose(), pdin(w)), self.observed[i]) final = lambda w: sum(pd(w, i) for i in range(self.n)) evidence = lambda w: - const(w) + 0.5*final(w) gradient = grad(evidence) ans, a = util.gradient_descent(evidence, W0) plt.plot(a) print a[0], a[-1] plt.show() #print ans return ans
''' Converting Tables in PDF into csv by Mukhamad Suhermanto This code is used to convert table in PDF file into csv format ''' from tabula import read_pdf import pandas as pd pdfFileName = input("Enter your file name here:") pdfFolderName = input("Enter your folder name where the PDF from here:") pageNum = input( "Enter the page number where the table you want to extract is, here:") pageNum = int(pageNum) ''' def pdf2table(pdfFileName, pdfFolderName, pageNum): read_pdf(pdfFolderName/pdfFileName+".pdf", pages= pageNum) input("Enter the csv file name you want to save:") input("Enter the name of the folder where you want to save your created csv file:") return df_idn.to_csv(csvFolderName/csvFileName+".csv", index = False) ''' df = read_pdf(pdfFolderName + "/" + pdfFileName + ".pdf", pages=pageNum) df = pd(df) csvFileName = input("Enter the csv file name you want to save:") csvFolderName = input( "Enter the name of the folder where you want to save your created csv file:" ) df.to_csv(csvFolderName + "/" + csvFileName + ".csv", index=False)
import pandas as pd import numpy as np import sklearn as sk from sklearn import datasets from sklearn.model_selection import train_test_split iris = datasets.load_iris() print(iris) print(iris.data) print(iris.keys()) print(iris.target) print(iris.target_names) print(iris.DESCR) print(iris.data.shape) print(iris.data.dtype) pd_X = pd({iris.feature_names[0]: iris.data[:, 0]}) print(pd_X.iloc[:10, :])
'estimators_', 'feature_importances_', 'oob_score_', 'oob_prediction_', # for random forest ) return { name: getattr(self.model, name, None) for name in attribute_names } def extract_and_transform(self, samples, transform_y=True): 'return X and y' result = self.implementation_module.extract_and_transform( self, samples, transform_y) return result def predict(self, samples): X_test, y_test = self.extract_and_transform(samples, transform_y=False) assert y_test is None return self.implementation_module.predict(self, X_test) def setattr(self, parameter, value): setattr(self, parameter, value) return self if False: pd() pprint() Features()
3 = Can apply with 20% help from Google 4 = Can teach <20% help from Google 5 = Can design, review, optimize What is your average score? What areas are you doing great in, what areas do you need to study? This Project will grow and be designed and redesigned several different ways as you progress and revist.''' ############################################################################### ## BONUS ## ## Pandas reading files , Connecting to AWS ## ############################################################################### ''' Pandas is something we will explore more in Python for Data III However here is a small look into how Pandas can read files Read Data From a file Lets take a look at what the code is going to do. with most code you have to be able to read it backwards. Yoda talk. so reading this backwards we are : taking a counties.csv file and reading it using a read_csv functions from pd(pandas) and then loading that data into a variable called a. a = pd.read_csv('~/Desktop/DataSets/counties.csv') You will want to adjust the link to navigate to where your file is located.''' # read in the drinks data import pandas as pd a = pd.read_csv('~Matthew\Desktop\Datasets\counties.csv') a
def open(self): data = pd()
def __init__(self): self.corpus_ = None pd = PickleDef(self) self.pickle_kwargs = pd() super().__init__(**self.pickle_kwargs)
## ''' PROJECT EXERCISE Create a file that lists all the skills you have learned so far rate yourself for each skill in a seperate column next to the skill 0 = Don't know 1 = Aquiring knowledge 2 = Can apply with 80% help from Google 3 = Can apply with 20% help from Google 4 = Can teach <20% help from Google 5 = Can design, review, optimize What is your average score? What areas are you doing great in, what areas do you need to study? This Project will grow and be designed and redesigned several different ways as you progress and revist.''' ############################################################################### ## BONUS ## ## Pandas reading files , Connecting to AWS ## ############################################################################### ''' Pandas is something we will explore more in Python for Data III However here is a small look into how Pandas can read files Read Data From a file Lets take a look at what the code is going to do. with most code you have to
def __init__(self): self.phrase_model = None pd = PickleDef(self) self.pickle_kwargs = pd() super(PhraseModel, self).__init__(**self.pickle_kwargs)
X_train, y_train = self.extract_and_transform(train_df) self.implementation_module.fit(self, X_train, y_train) def get_attributes(self): 'return both sets of attributes, with None if not used by that model' pdb.set_trace() attribute_names = ( 'coef_', 'sparse_coef_', 'intercept_', 'n_iter_', # for linear 'estimators_', 'feature_importances_', 'oob_score_', 'oob_prediction_', # for random forest ) return {name: getattr(self.model, name, None) for name in attribute_names} def extract_and_transform(self, df, transform_y=True): 'return X and y' return self.implementation_module.extract_and_transform(self, df, transform_y) def predict(self, test_df): X_test, y_test = self.extract_and_transform(test_df, transform_y=False) assert y_test is None return self.implementation_module.predict(self, X_test) def setattr(self, parameter, value): setattr(self, parameter, value) return self if False: pd() pprint() Features()
T = 1e-3 GPTI = np.loadtxt( '/Users/kimhedelin/Google Drive/VT18/Neuroscience/simulation/BGnetwork/sample/BasalFiring/data/GPTI-15149-0.gdf' ) STN = np.loadtxt( '/Users/kimhedelin/Google Drive/VT18/Neuroscience/simulation/BGnetwork/sample/BasalFiring/data/STN-15146-0.gdf' ) ed = np.arange(0, 1000, 1) STNp, yyed = np.histogram(STN[200:, 1], ed, range=None, normed=None, weights=None, density=None) #print(yy) fp, yp = pd(STNp, 1 / T, 'hann') #xf = np.linspace(0.0, 1.0/(2.0*T), N//2) #print(int(N//2) #plt.plot(xf, 2.0/N * np.abs(yf[0:int(N//2)])) plt.subplot(711) plt.plot(fp, np.log10(yp)) plt.grid() plt.title('STN') plt.subplot(712) GPTIp, yyed = np.histogram(GPTI[200:, 1], ed, range=None, normed=None, weights=None, density=None)
def csvEditProcess(): with open('./edit/intel_gen_2.csv') as fh: header = [h.strip() for h in fh.next().split(',')] reader = csv.DictReader(fh, fieldnames=header) df = pd(reader) df.to_csv(csvFile)
def __init__(self): self.df_train = None self.models = None pd = PickleDef(self) super().__init__(**pd())
"""`main` is the top level module for your Flask application.""" # Import the Flask Framework from flask import Flask import pydrive as pd gauth = GoogleAuth() fm = pd(gauth) app = Flask(__name__) # Note: We don't need to call run() since our application is embedded within # the App Engine WSGI application server. @app.route('/') def hello(): """Return a friendly HTTP greeting.""" return 'Hello World!' @app.errorhandler(404) def page_not_found(e): """Return a custom 404 error.""" return 'Sorry, Nothing at this URL.', 404 @app.errorhandler(500) def application_error(e): """Return a custom 500 error.""" return 'Sorry, unexpected error: {}'.format(e), 500
T = 1e-3 GPTI = np.loadtxt( '/Users/kimhedelin/Google Drive/VT18/Neuroscience/simulation/BGnetwork/sample/oscilating/data/GPTI-15149-0.gdf' ) STN = np.loadtxt( '/Users/kimhedelin/Google Drive/VT18/Neuroscience/simulation/BGnetwork/sample/oscilating/data/STN-15146-0.gdf' ) ed = np.arange(0, 1000, 1) STNp, yyed = np.histogram(STN[:, 1], ed, range=None, normed=None, weights=None, density=None) #print(yy) fp, yp = pd(STNp, 1 / T, 'hamming', scaling='spectrum', nfft=32) #xf = np.linspace(0.0, 1.0/(2.0*T), N//2) #print(int(N//2) #plt.plot(xf, 2.0/N * np.abs(yf[0:int(N//2)])) plt.figure(1) plt.plot(fp, np.log10(yp)) plt.plot(fp, np.log10(45 * 1 / fp)) plt.grid() plt.title('STN') plt.figure(2) plt.subplot(712) GPTIp, yyed = np.histogram(GPTI[:, 1], ed, range=None, normed=None,
# close the file when done f.close() # Open the file back up and read the contents f = open("textfile.txt","r") if f.mode == 'r': # check to make sure that the file was opened # use the read() function to read the entire file # contents = f.read() # print contents fl = f.readlines() # readlines reads the individual lines into a list for x in fl: print x ''' Read Data From a file Lets take a look at what the code is going to do. with most code you have to be able to read it backwards. Yoda talk. so reading this backwards we are : taking a drinks.csv file and reading it using a read_csv functions from pd(pandas) and then loading that data into a variable called drinks. drinks = pd.read_csv('~/Desktop/DataScience/drinks.csv') You will want to adjust the link to navigate to where your file is located.''' # read in the drinks data import pandas as pd a = pd.read_csv('~\multiline.csv') a '''PROJECT EXERCISE
import statsmodels.tsa as tsa import statsmodels.api as sm period = 90 inputfile = "../Datasets/YPFD.2000.2021.csv" print('Input file is "', inputfile) df1 = pd.read_csv(inputfile) df1['fechaHora'] = pd.DatetimeIndex(df1['fechaHora']) df1 = df1[['fechaHora', 'ultimoPrecio']] # Linear Regression df = df1 nsample = len(df) x = pd.to_numeric(pd(df1['fechaHora'])) X = x #X = np.column_stack(x) X = sm.add_constant(X) y = np.array(df1['ultimoPrecio']) model = sm.OLS(y, X) res = model.fit() print(res.summary()) from statsmodels.sandbox.regression.predstd import wls_prediction_std prstd, iv_l, iv_u = wls_prediction_std(res) fig, ax = plt.subplots(figsize=(8, 6)) ax.plot(x, y, 'o', label="data") ax.plot(x, res.fittedvalues, 'r--.', label="OLS") ax.plot(x, iv_u, 'r--') ax.plot(x, iv_l, 'r--') ax.legend(loc='best')