def mutual_info(x, y):
    import numpy as np
    x = discretize_feature(x)
    c = 0
    rX = np.unique(x)
    rY = np.unique(y)

    pX = pd(x)
    pY = pd(y)
    pX_Y = pD(x, y)

    Aentropy = 0.0
    for t1 in rX:
        if pX[t1] != 0:
            Aentropy -= pX[t1] * np.log2(pX[t1])
    Bentropy = 0.0
    for t1 in rY:
        if pY[t1] != 0:
            Bentropy -= pY[t1] * np.log2(pY[t1])

    ABentropy = 0.0
    for t1 in rX:
        for t2 in rY:
            if pX_Y[(t1, t2)] != 0:
                ABentropy -= pX_Y[(t1, t2)] * np.log2(pX_Y[(t1, t2)])
    #    else:
    #     print(0)
    #print(iX_Y)

    return (Aentropy + Bentropy - ABentropy) / (
        Aentropy + Bentropy)  # returns normalized mutual information gain
Ejemplo n.º 2
0
 def __init__(self, force_build=False):
     self.pipeline_steps = None
     self.corpus_ = None
     self.phrases = None
     pd = PickleDef(self)
     self.pickle_kwargs = pd()
     super(Preprocessor, self).__init__(force_build, **self.pickle_kwargs)
Ejemplo n.º 3
0
def predict_api():
    '''
    For direct API calls trought request
    '''
    data = request.get_json(force=True)
    prediction = model.predict([pd(list(data.values()))])

    output = prediction[0]
    return jsonify(output)
Ejemplo n.º 4
0
    def _dump_csv(self, path):
        """Dump the information to a csv file.

        Input:
            - path: the output file.
        """
        dataframe = pd()
        with open(path, 'w') as f:
            f.write(dataframe.to_csv())
Ejemplo n.º 5
0
 def file_open(self):
     fileName = QtWidgets.QFileDialog.getOpenFileName(Rule_Priority_test, 'Open csv' , QtCore.QDir.rootPath() , '*.csv')
     #df1 = pd.DataFrame()
     #df1 = pd.concat([pd.read_csv(f) for f in glob.glob('*.csv')] , ignore_index=True)
     print(filename)
     path = self.lineEdit.text(fileName)
     df1 = pd.read_csv(path)
     df2 = pd.read_csv('export.csv')
     df = pd.merge(df2, df1, how='inner').dropna(axis="columns")
     model = pd(df)
     self.tableView.setModel(model)
Ejemplo n.º 6
0
def memory_cleaner():
    proc = psutil.Process(os.getpid())
    gc.collect()
    mem0 = proc.memory_info().rss

    # create approx. 10**7 int objects and pointers
    foo = ['abc' for x in range(10**7)]
    mem1 = proc.memory_info().rss

    # unreference, including x == 9999999
    del foo
    mem2 = proc.memory_info().rss

    # collect() calls PyInt_ClearFreeList()
    # or use ctypes: pythonapi.PyInt_ClearFreeList()
    gc.collect()
    mem3 = proc.memory_info().rss

    pd = lambda x2, x1: 100.0 * (x2 - x1) / mem0
    print("Allocation: %0.2f%%" % pd(mem1, mem0))
    print("Unreference: %0.2f%%" % pd(mem2, mem1))
    print("Collect: %0.2f%%" % pd(mem3, mem2))
    print("Overall: %0.2f%%" % pd(mem3, mem0))
Ejemplo n.º 7
0
        def crearCurvas():
            pArena = np.array([[-1.5, 2.66], [-1, 2.64], [0.5, 2.605],
                               [2, 2.57], [5, 2.51], [8, 2.46], [10, 2.43],
                               [20, 2.26], [25, 2.18], [31, 2.08], [36, 2.0],
                               [40.5, 1.92]])
            pCaliza = np.array([[3, 2.66], [36, 2.1]])
            pDolomita = np.array([[4, 2.86], [8.5, 2.82], [13,
                                                           2.76], [18, 2.68],
                                  [23, 2.59], [27, 2.5], [32, 2.4], [36, 2.3],
                                  [38, 2.26], [41, 2.18], [44, 2.1]])
            pLutita = np.array([[30, 2.8], [40, 2.6], [41, 2.5]])

            arena = []
            caliza = []
            dolomita = []
            lutita = []

            za = np.polyfit(pArena[:, 0], pArena[:, 1], 5)
            pa = np.poly1d(za)

            zc = np.polyfit(pCaliza[:, 0], pCaliza[:, 1], 1)
            pc = np.poly1d(zc)

            zd = np.polyfit(pDolomita[:, 0], pDolomita[:, 1], 5)
            pd = np.poly1d(zd)

            zl = np.polyfit(pLutita[:, 0], pLutita[:, 1], 1)
            pl = np.poly1d(zl)

            for i in range(45):
                arena.append([0.93 * i - 1.5, pa(0.93 * i - 1.5)])
                caliza.append([1 * i, pc(1 * i)])
                dolomita.append([0.94 * i + 2.5, pd(0.94 * i + 2.5)])
            for i in range(20):
                lutita.append([0.7 * i + 30, pc(1 * i) + 0.1])

            arena = np.array(arena)
            caliza = np.array(caliza)
            dolomita = np.array(dolomita)
            lutita = np.array(lutita)

            return (arena, caliza, dolomita, lutita)
Ejemplo n.º 8
0
	def marginal_likelihood(self, W0):
		a = self.sigx*np.identity(self.dimx)
				
		win = lambda w: np.dot(w, w.transpose()) + a
		const = lambda w: -(self.n/2.0)*np.log( np.linalg.det(win(w)) )
		
		pdin = lambda w: np.linalg.inv( win(w) )
		
		pd = lambda w,i: np.dot(np.dot(self.observed[i].transpose(), pdin(w)), self.observed[i])
		
		final = lambda w: sum(pd(w, i)  for i in range(self.n))
		
		evidence = lambda w: - const(w) + 0.5*final(w)
		gradient = grad(evidence)

		ans, a = util.gradient_descent(evidence, W0)
		#plot learning curve
		plt.plot(a)
		plt.show()
		
		return ans
Ejemplo n.º 9
0
def Grapy(np_data, graph):

    graph.delete_all()
    # tx = graph.begin()
    # 创建Node结点对象
    # 结点对象有lable 标签 和 属性
    # person 是其lable 标签, 有一个属性name

    for row in np_data:
        device, vul, patch = row[0], row[1], row[2]

        # 设备
        if not len(graph.nodes.match("device", name=device)):
            denode = Node('device', name=device)
            graph.create(denode)
        res_device = graph.nodes.match('device', name=device).first()

        # 漏洞
        if not len(graph.nodes.match("vul", name=vul)):
            vulnode = Node('vul', name=vul)
            graph.create(vulnode)
        res_vul = graph.nodes.match('vul', name=vul).first()

        # 设备-漏洞
        dv = Relationship.type("device-vul")
        graph.merge(dv(res_device, res_vul))

        # 补丁
        if not len(graph.nodes.match("patch", name=patch)):
            patchnode = Node('patch', name=patch)
            graph.create(patchnode)
        res_patch = graph.nodes.match('patch', name=patch).first()

        # 漏洞-补丁
        vp = Relationship.type("vul-patch")
        graph.merge(vp(res_vul, res_patch))

        # 补丁-设备
        pd = Relationship.type("patch-device")
        graph.merge(pd(res_patch, res_device))
Ejemplo n.º 10
0
 def __init__(self, file=None):
     # defines allowed extensions
     self.allowed_extensions = ['txt', 'csv']
     self.data_dir = DATA_DIR
     self.read_dfs = {}
     pd = PickleDef(self)
     self.pickle_kwargs = pd()
     super(Read, self).__init__(**self.pickle_kwargs)
     # logic to read in new files
     if file:
         if type(file) == list:
             for f in file:
                 if f in self.read_dfs:
                     print(f'{f} already has been read \n skipping {f}')
                     pass
                 else:
                     self.__readFilesDataFrames(file)
         else:
             if file in self.read_dfs:
                 print(f'{file} already has been read \n using loaded object')
             else:
                 self.__readFilesDataFrames(file)
Ejemplo n.º 11
0
	def marginal_likelihood(self, W0):
		a = self.sigx*np.identity(self.dimx)
				
		win = lambda w: np.dot(w, w.transpose()) + a
		const = lambda w: -(self.n/2.0)*np.log( np.linalg.det(win(w)) )
		
		pdin = lambda w: np.linalg.inv( win(w) )
		
		pd = lambda w,i: np.dot(np.dot(self.observed[i].transpose(), pdin(w)), self.observed[i])
		
		final = lambda w: sum(pd(w, i)  for i in range(self.n))
		
		evidence = lambda w: - const(w) + 0.5*final(w)
		gradient = grad(evidence)

		ans, a = util.gradient_descent(evidence, W0)
		plt.plot(a)
		print a[0], a[-1]
		plt.show()
		
		#print ans
		return ans
Ejemplo n.º 12
0
'''
Converting Tables in PDF into csv
by Mukhamad Suhermanto

This code is used to convert table in PDF file into csv format
'''

from tabula import read_pdf
import pandas as pd

pdfFileName = input("Enter your file name here:")
pdfFolderName = input("Enter your folder name where the PDF from here:")
pageNum = input(
    "Enter the page number where the table you want to extract is, here:")
pageNum = int(pageNum)
'''
def pdf2table(pdfFileName, pdfFolderName, pageNum):
    read_pdf(pdfFolderName/pdfFileName+".pdf", pages= pageNum)
    input("Enter the csv file name you want to save:")
    input("Enter the name of the folder where you want to save your created csv file:")
    return df_idn.to_csv(csvFolderName/csvFileName+".csv", index = False)

'''
df = read_pdf(pdfFolderName + "/" + pdfFileName + ".pdf", pages=pageNum)
df = pd(df)
csvFileName = input("Enter the csv file name you want to save:")
csvFolderName = input(
    "Enter the name of the folder where you want to save your created csv file:"
)
df.to_csv(csvFolderName + "/" + csvFileName + ".csv", index=False)
Ejemplo n.º 13
0
import pandas as pd
import numpy as np
import sklearn as sk
from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
print(iris)
print(iris.data)
print(iris.keys())
print(iris.target)
print(iris.target_names)
print(iris.DESCR)
print(iris.data.shape)
print(iris.data.dtype)
pd_X = pd({iris.feature_names[0]: iris.data[:, 0]})
print(pd_X.iloc[:10, :])
Ejemplo n.º 14
0
            'estimators_',
            'feature_importances_',
            'oob_score_',
            'oob_prediction_',  # for random forest
        )
        return {
            name: getattr(self.model, name, None)
            for name in attribute_names
        }

    def extract_and_transform(self, samples, transform_y=True):
        'return X and y'
        result = self.implementation_module.extract_and_transform(
            self, samples, transform_y)
        return result

    def predict(self, samples):
        X_test, y_test = self.extract_and_transform(samples, transform_y=False)
        assert y_test is None
        return self.implementation_module.predict(self, X_test)

    def setattr(self, parameter, value):
        setattr(self, parameter, value)
        return self


if False:
    pd()
    pprint()
    Features()
3 = Can apply with 20% help from Google
4 = Can teach <20% help from Google
5 = Can design, review, optimize
What is your average score?
What areas are you doing great in, what areas do you need to study?
This Project will grow and be designed and redesigned several different ways
as you progress and revist.'''




###############################################################################
##                               BONUS                                       ## 
## Pandas reading files  , Connecting to AWS                                 ##
###############################################################################
'''
Pandas is something we will explore more in Python for Data III
However here is a small look into how Pandas can read files

Read Data From a file
Lets take a look at what the code is going to do. with most code you have to 
be able to read it backwards. Yoda talk. so reading this backwards we are : 
taking a counties.csv file and reading it using a read_csv functions from 
pd(pandas) and then loading that data into a variable called a. 
a = pd.read_csv('~/Desktop/DataSets/counties.csv')
You will want to adjust the link to navigate to where your file is located.'''

# read in the drinks data
import pandas as pd
a = pd.read_csv('~Matthew\Desktop\Datasets\counties.csv')
a
Ejemplo n.º 16
0
 def open(self):
     data = pd()
Ejemplo n.º 17
0
 def __init__(self):
     self.corpus_ = None
     pd = PickleDef(self)
     self.pickle_kwargs = pd()
     super().__init__(**self.pickle_kwargs)
##




''' PROJECT EXERCISE 
Create a file that lists all the skills you have learned so far
rate yourself for each skill in a seperate column next to the skill
0 = Don't know
1 = Aquiring knowledge
2 = Can apply with 80% help from Google
3 = Can apply with 20% help from Google
4 = Can teach <20% help from Google
5 = Can design, review, optimize
What is your average score?
What areas are you doing great in, what areas do you need to study?
This Project will grow and be designed and redesigned several different ways
as you progress and revist.'''




###############################################################################
##                               BONUS                                       ## 
## Pandas reading files  , Connecting to AWS                                 ##
###############################################################################
'''
Pandas is something we will explore more in Python for Data III
However here is a small look into how Pandas can read files

Read Data From a file
Lets take a look at what the code is going to do. with most code you have to 
Ejemplo n.º 19
0
 def __init__(self):
     self.phrase_model = None
     pd = PickleDef(self)
     self.pickle_kwargs = pd()
     super(PhraseModel, self).__init__(**self.pickle_kwargs)
Ejemplo n.º 20
0
        X_train, y_train = self.extract_and_transform(train_df)
        self.implementation_module.fit(self, X_train, y_train)

    def get_attributes(self):
        'return both sets of attributes, with None if not used by that model'
        pdb.set_trace()
        attribute_names = (
            'coef_', 'sparse_coef_', 'intercept_', 'n_iter_',                        # for linear
            'estimators_', 'feature_importances_', 'oob_score_', 'oob_prediction_',  # for random forest
        )
        return {name: getattr(self.model, name, None) for name in attribute_names}

    def extract_and_transform(self, df, transform_y=True):
        'return X and y'
        return self.implementation_module.extract_and_transform(self, df, transform_y)

    def predict(self, test_df):
        X_test, y_test = self.extract_and_transform(test_df, transform_y=False)
        assert y_test is None
        return self.implementation_module.predict(self, X_test)

    def setattr(self, parameter, value):
        setattr(self, parameter, value)
        return self


if False:
    pd()
    pprint()
    Features()
Ejemplo n.º 21
0
T = 1e-3
GPTI = np.loadtxt(
    '/Users/kimhedelin/Google Drive/VT18/Neuroscience/simulation/BGnetwork/sample/BasalFiring/data/GPTI-15149-0.gdf'
)
STN = np.loadtxt(
    '/Users/kimhedelin/Google Drive/VT18/Neuroscience/simulation/BGnetwork/sample/BasalFiring/data/STN-15146-0.gdf'
)
ed = np.arange(0, 1000, 1)
STNp, yyed = np.histogram(STN[200:, 1],
                          ed,
                          range=None,
                          normed=None,
                          weights=None,
                          density=None)
#print(yy)
fp, yp = pd(STNp, 1 / T, 'hann')
#xf = np.linspace(0.0, 1.0/(2.0*T), N//2)
#print(int(N//2)
#plt.plot(xf, 2.0/N * np.abs(yf[0:int(N//2)]))
plt.subplot(711)
plt.plot(fp, np.log10(yp))
plt.grid()
plt.title('STN')

plt.subplot(712)
GPTIp, yyed = np.histogram(GPTI[200:, 1],
                           ed,
                           range=None,
                           normed=None,
                           weights=None,
                           density=None)
Ejemplo n.º 22
0
def csvEditProcess():
    with open('./edit/intel_gen_2.csv') as fh:
        header = [h.strip() for h in fh.next().split(',')]
        reader = csv.DictReader(fh, fieldnames=header)
        df = pd(reader)
        df.to_csv(csvFile)
Ejemplo n.º 23
0
 def __init__(self):
     self.df_train = None
     self.models = None
     pd = PickleDef(self)
     super().__init__(**pd())
Ejemplo n.º 24
0
"""`main` is the top level module for your Flask application."""

# Import the Flask Framework
from flask import Flask
import pydrive as pd
gauth = GoogleAuth()
fm = pd(gauth)
app = Flask(__name__)
# Note: We don't need to call run() since our application is embedded within
# the App Engine WSGI application server.


@app.route('/')
def hello():
    """Return a friendly HTTP greeting."""
    return 'Hello World!'


@app.errorhandler(404)
def page_not_found(e):
    """Return a custom 404 error."""
    return 'Sorry, Nothing at this URL.', 404


@app.errorhandler(500)
def application_error(e):
    """Return a custom 500 error."""
    return 'Sorry, unexpected error: {}'.format(e), 500
Ejemplo n.º 25
0
T = 1e-3
GPTI = np.loadtxt(
    '/Users/kimhedelin/Google Drive/VT18/Neuroscience/simulation/BGnetwork/sample/oscilating/data/GPTI-15149-0.gdf'
)
STN = np.loadtxt(
    '/Users/kimhedelin/Google Drive/VT18/Neuroscience/simulation/BGnetwork/sample/oscilating/data/STN-15146-0.gdf'
)
ed = np.arange(0, 1000, 1)
STNp, yyed = np.histogram(STN[:, 1],
                          ed,
                          range=None,
                          normed=None,
                          weights=None,
                          density=None)
#print(yy)
fp, yp = pd(STNp, 1 / T, 'hamming', scaling='spectrum', nfft=32)
#xf = np.linspace(0.0, 1.0/(2.0*T), N//2)
#print(int(N//2)
#plt.plot(xf, 2.0/N * np.abs(yf[0:int(N//2)]))
plt.figure(1)
plt.plot(fp, np.log10(yp))
plt.plot(fp, np.log10(45 * 1 / fp))
plt.grid()
plt.title('STN')

plt.figure(2)
plt.subplot(712)
GPTIp, yyed = np.histogram(GPTI[:, 1],
                           ed,
                           range=None,
                           normed=None,
  # close the file when done
  f.close()
  
  # Open the file back up and read the contents
  f = open("textfile.txt","r")
  if f.mode == 'r': # check to make sure that the file was opened
    # use the read() function to read the entire file
#     contents = f.read()
#     print contents
    
    fl = f.readlines() # readlines reads the individual lines into a list
    for x in fl:
      print x


''' Read Data From a file
Lets take a look at what the code is going to do. with most code you have to 
be able to read it backwards. Yoda talk. so reading this backwards we are : 
taking a drinks.csv file and reading it using a read_csv functions from 
pd(pandas) and then loading that data into a variable called drinks. 
drinks = pd.read_csv('~/Desktop/DataScience/drinks.csv')
You will want to adjust the link to navigate to where your file is located.'''

# read in the drinks data
import pandas as pd
a = pd.read_csv('~\multiline.csv')
a



'''PROJECT EXERCISE
Ejemplo n.º 27
0
import statsmodels.tsa as tsa
import statsmodels.api as sm

period = 90

inputfile = "../Datasets/YPFD.2000.2021.csv"
print('Input file is "', inputfile)
df1 = pd.read_csv(inputfile)
df1['fechaHora'] = pd.DatetimeIndex(df1['fechaHora'])
df1 = df1[['fechaHora', 'ultimoPrecio']]

# Linear Regression

df = df1
nsample = len(df)
x = pd.to_numeric(pd(df1['fechaHora']))
X = x  #X = np.column_stack(x)
X = sm.add_constant(X)
y = np.array(df1['ultimoPrecio'])
model = sm.OLS(y, X)
res = model.fit()
print(res.summary())

from statsmodels.sandbox.regression.predstd import wls_prediction_std
prstd, iv_l, iv_u = wls_prediction_std(res)
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(x, y, 'o', label="data")
ax.plot(x, res.fittedvalues, 'r--.', label="OLS")
ax.plot(x, iv_u, 'r--')
ax.plot(x, iv_l, 'r--')
ax.legend(loc='best')