Ejemplo n.º 1
0
 def __init__(self, nodo, aristas):
     self.vertices  = nodo
     self.aristas = aristas
     self.grafo = nx.Graph()#Grafo no dirigido 
     #self.gradoD = nx.DiGraph()
     self.gradoD = nx.MultiDiGraph()
     self.objLoadData = LoadData()
    def __init__(self, ori_dataset, max_depth):
        self.col = []

        self.max_depth = max_depth
        self.dataset = ori_dataset

        self.ld = LoadData()
        self.ins = Inspection(ori_dataset)
Ejemplo n.º 3
0
 def load_images(self):
     """
     load visualization samples.
     :return
         next_element_train (generator): can iter images and labels.
     """
     loader = LoadData(self.batch_size, self.sess)
     next_element_train = loader.get_data(self.train_files)
     return next_element_train
Ejemplo n.º 4
0
    def Caculate(self,
                 sess,
                 files,
                 accuracy_tensor,
                 Cost_tensor,
                 data_placeholder,
                 labels_placeholder,
                 rate_placeholder,
                 model='train'):
        """
        Calculate accuracy and loss.
        :param sess : tensor graph.
        :param files (ndarray): score data.
        :param accuracy_tensor (tensor): accuracy tensor function graph.
        :param Cost_tensor (tensor): cost tensor function graph.
        :param data_placeholder (tensor): data placeholder.
        :param labels_placeholder (tensor): labels placeholder.
        :param rate_placeholder (tensor): rate placeholder.
        :param model (string): can choose 'train' or 'test' to scored model.
        :return acc_mean,loss_mean (float): mean accuracy and mean loss.
        """
        if model == 'train':
            N = self.N_train
        else:
            N = self.N_test
        loader = LoadData(self.batch_size, sess)
        next_element_ = loader.get_data(files)
        acc, loss, count = 0, 0, 1

        while 1:
            try:
                images, labels = sess.run(next_element_)
                print('Score {}/{} \r'.format(count, N), end='', flush=True)
                count += 1
            except tf.errors.OutOfRangeError:
                break
            else:
                acc_, loss_ = sess.run(
                    [accuracy_tensor, Cost_tensor],
                    feed_dict={
                        data_placeholder: images,
                        labels_placeholder: labels,
                        rate_placeholder: 0
                    })
                acc += acc_
                loss += loss_

        acc_mean = acc / count
        loss_mean = loss / count

        return acc_mean, loss_mean
Ejemplo n.º 5
0
class Inspection():
    def __init__(self,ori_dataset):
        self.ori_dataset = ori_dataset
        self.ld = LoadData()

        self.er = 0
        self.gi = 0

    # majority vote
    def majority_vote(self,dataset):
        count1 = 0
        label = self.ld.get_value(dataset,-1)
        for row in dataset:
            if row[-1] == label[0]:
                count1 += 1
            else:
                continue
        count2 = len(dataset) - count1
        if count1 > count2:
            return label[0]
        elif count2 > count1:
            return label[1]
        elif count2==0:
            return label[0]
        else:
            return label[1]

    # error rate
    def error_rate(self,dataset):
        label = self.majority_vote(dataset)
        count = 0
        for row in dataset:
            if row[-1] != label:
                count += 1
        self.er = count/len(dataset)
        return self.er

    #gini impurity
    def gini_impurity(self,dataset):
        if len(dataset)==0:
            self.gi=0
        else:
            count1 = 0
            for item in dataset:
                if item[-1]==dataset[0][-1]:
                    count1+=1
            count2 = len(dataset)-count1
            self.gi = (count1/len(dataset))*(count2/len(dataset))+(count2/len(dataset))*(count1/len(dataset))
        return self.gi

    # evaluate with error_rate and gini_impurity
    def evaluate(self):
        err_rate = self.error_rate(self.ori_dataset)
        gini_impurity = self.gini_impurity(self.ori_dataset)
        return err_rate,gini_impurity
Ejemplo n.º 6
0
class Grafo:

    def __init__(self, nodo, aristas):
        self.vertices  = nodo
        self.aristas = aristas
        self.grafo = nx.Graph()#Grafo no dirigido 
        #self.gradoD = nx.DiGraph()
        self.gradoD = nx.MultiDiGraph()
        self.objLoadData = LoadData()


    def existeNodo(self, nodo):
        if nodo in self.vertices:
            return True
        return False

    def getGrafo(self):
        return self.gradoD

    def addVertex(self, nodo):
        pass

    def addArista(self):
        for base in self.aristas:
            aux = self.aristas.get(base)
            print(base)
            consultaColum = self.objLoadData.uniqueColumn(' airport_arr ', aux)
            for destino in consultaColum:
                self.gradoD.add_edge(base,destino)
                print(base,destino)

        print (len((self.gradoD.edges)))
        print(self.gradoD.edges)

    def addNode(self):
        nodoBases = [' BASE1 ', ' BASE2 ', 'BASE3 ']
        for nodo in self.vertices:
            if nodo in nodoBases:
                self.gradoD.add_node(nodo)

    def drawGraph(self):
        self.gradoD.add_nodes_from(self.vertices)
        self.addArista()
        pos = nx.spring_layout(self.gradoD)
        plt.figure()
        nx.draw(self.gradoD, pos, edge_color = 'black', width = 1, linewidths =2,
                node_size= 1500 , node_color = 'purple', alpha= 0.7,ax=None)

        nx.draw_networkx_labels(self.gradoD,pos,
                                labels= {node: node for node in self.gradoD.nodes()},
                                font_size=9, alpha=1, horizontalalignment='center',verticalalignment='center'
                                ,ax=None)
        plt.show()
    def __init__(self,col_index_used,col_index_touse, dataset, head, left = None, right = None, depth=0):
        self.left = left
        self.right = right
        self.message = ''
        self.col_index_touse = col_index_touse
        self.col_index_used = col_index_used
        self.attribute = head[col_index_used]  # the attribute to divide
        self.dataset = dataset      # the dataset divided by attribute
        self.depth = depth          # current depth of the node

        self.ld = LoadData()
        self.ins = Inspection(self.dataset)
        self.result = self.ins.majority_vote(dataset) # majority_vote for each node
Ejemplo n.º 8
0
class Vertex:

    def __init__(self):
        self.objloadata = LoadData()

    def recorrerDataFrame(self, dataF):

        for indiceF, fila in  dataF.iterrows():
            print(fila)

    def recorrerDictionary(self, clave):
        self.objloadata.upLoadData()
        listaAdyancen = self.objloadata.getBasesConexiones()
        print(listaAdyancen.keys())
        aux = pd.DataFrame()
        if clave in listaAdyancen:
            aux = listaAdyancen.get(clave)
            print(type(aux))
            self.recorrerDataFrame(aux)
            print('*******')
        else:
            print('Base o aeropuerto no existe')
Ejemplo n.º 9
0
    def addNode(self):
        nodoBases = [' BASE1 ', ' BASE2 ', 'BASE3 ']
        for nodo in self.vertices:
            if nodo in nodoBases:
                self.gradoD.add_node(nodo)

    def drawGraph(self):
        self.gradoD.add_nodes_from(self.vertices)
        self.addArista()
        pos = nx.spring_layout(self.gradoD)
        plt.figure()
        nx.draw(self.gradoD, pos, edge_color = 'black', width = 1, linewidths =2,
                node_size= 1500 , node_color = 'purple', alpha= 0.7,ax=None)

        nx.draw_networkx_labels(self.gradoD,pos,
                                labels= {node: node for node in self.gradoD.nodes()},
                                font_size=9, alpha=1, horizontalalignment='center',verticalalignment='center'
                                ,ax=None)
        plt.show()



objload = LoadData()
objload.upLoadData()
print(objload.getBasesConexiones())
objgrafo = Grafo(objload.getDataAirport(),objload.getBasesConexiones())
objgrafo.drawGraph()



Ejemplo n.º 10
0
Archivo: app.py Proyecto: ashu11939/BA
from flask import Flask, request
from datetime import datetime
import re
from loadData import LoadData
from flask import Response
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

loadData = LoadData()


@app.route("/", methods=['POST', 'GET'])
def home():

    stockStatementURI = 'C:\\Users\\Ashutosh\\workspace\\BusinessAnalytics\\local\\BusinessAnalyticsPy\\fwdlezarstock\\STOCK-3010.csv'
    skipRows = 2
    stockStatement = loadData.loadCsvData(stockStatementURI, skipRows)
    stockStatementDF = loadData.convertCSVToDataFrame(stockStatement)
    product = stockStatementDF['Product Name']

    cost = stockStatementDF['Purchase Price'] * \
        stockStatementDF['Current Stock']

    # Create a new array for X-Axis being the 1st column and Y-axis being the second column
    data = loadData.convertToJsonObject(
        ["products", "cost"],
        [product.to_json(), cost.to_json()])
    product.to_json()
    # data = '{"products":' + product.to_json() + ',"cost":' + cost.to_json() + "}"
Ejemplo n.º 11
0
 def prepareAndReadData(self):
   '''
   Load the data from the onefile into memory, with train/val/test chunks
   '''
   loader = LoadData(self.args)
   return loader.run()
Ejemplo n.º 12
0
def inicializar (self):
    objload = LoadData()
Ejemplo n.º 13
0
import matplotlib.pyplot as plt
import numpy as np

from loadData import LoadData
from decisionTree import Node, DecisionTree, Evaluate
from inspection import Inspection

if __name__ == '__main__':
    train_input = '../handout/education_train.tsv'
    test_input = '../handout/education_test.tsv'
    train_output = '../result/education_train.labels'
    test_output = '../result/education_test.labels'

    ld = LoadData()
    dataset = ld.load_data(train_input)
    dt = DecisionTree(ld)
    tr_err = []
    te_err = []
    x_arr = []
    print(ld.head)
    for i in range(len(ld.head)):
        root = dt.construct(dataset, i)
        # dt.traverse(root)
        dt.classify(ld.load_data(train_input), root, train_output)
        dt.classify(ld.load_data(test_input), root, test_output)
        with open(train_output, 'r') as f:
            predcol = f.read().splitlines()
        realcol = np.loadtxt(train_input,
                             dtype=str,
                             delimiter='\t',
                             skiprows=1)[:, -1]
 def prepareAndReadData(self):
     '''
 Load the data from the onefile into memory, with train/val/test chunks
 '''
     loader = LoadData(self.args)
     return loader.run()
Ejemplo n.º 15
0
from loadData import LoadData

if __name__ == "__main__":
    ld = LoadData()
    # populate tables and update attr after getting all uniprot id using R
    ld.getBiogridIDFromUniprot()
    ld.loadProteinDataBankTable()
    # taken from protein atlas
    # populate after getting ensembl id using R
    ld.importCSV("./origExcel/subcellular_location.csv")
    ld.addProteinAtlasLocalization()


    # generating statis for typeahead search
    ld.importCSV("./origExcel/csvMutCollection.csv")
    ld.exportOneColtoJSON('Gene', 'HUGO_GENE_SYMBOL')
    ld.exportOneColtoJSON('Variant', 'MUT_HGVS_NT_ID')

    ld.exportOneColtoJSON('VariantProperty', 'CLINVAR_CLINICAL_SIGNIFICANCE')
    ld.exportOneColtoJSON('VariantProperty', 'PFAM_PRESENT')
    ld.exportOneColtoJSON('VariantProperty', 'IN_PFAM')
    ld.exportOneColtoJSON('VariantProperty', 'IN_MOTIF')
    ld.exportOneColtoJSON('Variant', 'CCSB_MUTATION_ID')
    ld.exportOneColtoJSON('Variant', 'DBSNP_ID')
    ld.exportOneColtoJSON('Variant', 'CHR_COORDINATE_HG19')
    ld.exportOneColtoJSON('Variant', 'MUT_HGVS_AA_ID')
    ld.exportOneColtoJSON('VariantProperty', 'HGMD_VARIANT_CLASS')
    ld.exportOneColtoJSON('Gene', 'OMIM_ID')
    ld.exportOneColtoJSON('Gene', 'ENTREZ_GENE_ID')
    ld.exportOneColtoJSON('Gene', 'UNIPROT_SWISSPROT_ID')
    ld.exportOneColtoJSON('Gene', 'CHROMOSOME_NAME')
nameSim = 'validation.mat'

flagSave = True
nameSavedFile = 'linValidation'
format = 'eps'

# Retrieve data for the 3 buildings and save them in different dataframe
nameBui = ['old', 'reno', 'renoLight']
leg = ['Original', 'Renovated', 'Light weight']
listDic = [{},{},{}]
listDf = []
nZones = 6
for j, s in enumerate(nameBui):
    for i in range(nZones):
        listDic[j]['Zone%d'%(i+1)] = '%s.error[%d]'%(s,(i+1))
    listDf.append(LoadData(path+nameSim, listDic[j]).df)

# Make boxplot from dataframes
f, axarr = plt.subplots(1, len(nameBui))
for j , s in enumerate(leg):
    listDf[j].boxplot(ax=axarr[j])
    axarr[j].set_title(s)
    axarr[j].set_ylim(-1,1)
    # rotate xticklables
    for tick in axarr[j].get_xticklabels():
        tick.set_rotation(45)
    if j==0:
        axarr[j].set_ylabel('[K]')

if flagSave:
    f.savefig(nameSavedFile + "." + format,format=format, bbox_inches='tight')
class DecisionTree():
    def __init__(self, ori_dataset, max_depth):
        self.col = []

        self.max_depth = max_depth
        self.dataset = ori_dataset

        self.ld = LoadData()
        self.ins = Inspection(ori_dataset)

    # divide the dataset with certain attribute
    def divide_dataset(self, dataset, col_index):
        label = self.ld.get_value(dataset, col_index)
        dataset0 = []
        dataset1 = []
        for row in dataset:
            if row[col_index] == label[0]:
                dataset0.append(row)
            else:
                dataset1.append(row)
        dataset0 = np.array(dataset0)
        dataset1 = np.array(dataset1)

        return dataset0, dataset1

    # calculate the gini impurity given attribute
    def gini_impurity(self, dataset, col_index=-1):

        if col_index == -1:
            gi = self.ins.gini_impurity(dataset)
        else:
            # print('dataset:\n', dataset)
            # print('col index:',col_index)
            ds = self.divide_dataset(dataset, col_index)
            # print('ds0:\n',ds[0])
            # print('ds1:\n', ds[1])
            gi_left = self.ins.gini_impurity(ds[0])
            gi_right = self.ins.gini_impurity(ds[1])
            gi = (len(ds[0]) / len(dataset)) * gi_left + (
                len(ds[1]) / len(dataset)) * gi_right

        return gi

    # calculate the gini gain given attribute
    def gini_gain(self, dataset, col_index):
        ori_gi = self.gini_impurity(dataset)
        new_gi = self.gini_impurity(dataset, col_index)
        gg = ori_gi - new_gi

        return gg

    def get_attribute(self, dataset, used_col):
        gg_arr = {}
        col_arr = [i for i in range(len(dataset[0]) - 1)]
        for item in list(set(col_arr).difference(set(used_col))):
            gg_arr[item] = self.gini_gain(dataset, item)
        col_index = max(gg_arr, key=gg_arr.get)

        return col_index

    # 记录下路径,再进行搞
    def construct(self, dataset, col_index=-1, depth=0):
        # print('\nlen:',len(dataset))
        # print(dataset)
        # print('used_col:',self.col)
        # print('depth:', depth)
        # reach the max depth
        if depth > self.max_depth:
            print('depth reach max depth')
            # self.col.pop(col_index)
            return None

        # after divide the dataset is empty
        elif len(dataset) == 0:
            print('dataset is empty.')
            # self.col.pop(col_index)
            return None

        # No more attribute to divide
        elif len(dataset[0]) == len(self.col) + 1:
            # print(self.col)
            print('all the attributes have been used.')
            # self.col.pop(col_index)
            # print(depth)
            return None

        # after divide the gini-impurity of dataset is 0
        elif self.gini_impurity(dataset) == 0:
            print('no need to do more division!')
            # self.col.pop(col_index)
            return None

        # recursively construct the left and right node
        else:
            col_index = self.get_attribute(dataset, self.col)
            # construct the current node
            node = Node(col_index, dataset, depth=depth)
            self.col.append(col_index)

            # divide the dataset according to max gini-gain
            new_dataset = self.divide_dataset(dataset, col_index)
            depth += 1
            #recurse the left branch
            left_branch = self.construct(new_dataset[0], col_index, depth)
            node.left = left_branch
            # self.col.pop(col_index)
            #recurse the right branch
            right_branch = self.construct(new_dataset[1], col_index, depth)
            node.right = right_branch
            # print('col_index:',col_index)
            self.col.remove(col_index)

            return node

    def traverse(self, node):
        if node:
            # print(node.dataset,'\n')
            print(node.depth, '\t', node.attribute)
            self.traverse(node.left)
            self.traverse(node.right)
            right_branch = self.construct(new_dataset[1], col_index, depth)
            node.right = right_branch
            # print('col_index:',col_index)
            self.col.remove(col_index)

            return node

    def traverse(self, node):
        if node:
            # print(node.dataset,'\n')
            print(node.depth, '\t', node.attribute)
            self.traverse(node.left)
            self.traverse(node.right)


if __name__ == '__main__':
    ld = LoadData()
    dataset = ld.load_data('../handout/small_train.tsv')
    dt = DecisionTree(dataset, 0)
    ds = dt.divide_dataset(dataset, 1)
    # gini = dt.gini_impurity(dataset,1)
    giga = dt.gini_gain(dataset, 1)
    # col = dt.get_attribute(dataset)
    root = dt.construct(dataset)
    # print(root.left.left.left.right.depth)
    dt.traverse(root)
    # print(root.dataset)

    # print(dataset)
    # print(ds[0])
    # print(ds[1])
Ejemplo n.º 19
0
 def __init__(self):
     self.objloadata = LoadData()
Ejemplo n.º 20
0
    def fit(self, lr, epochs, drop_rate):
        """
        fitting model.
        :param lr (float): learning rate.
        :param epochs (int): Iterate of epoch.
        :param drop_rate (float): dropout rate. rate = 1 - keep_prob.
        :return:
        """

        # create placeholder
        data = tf.placeholder(tf.float32, [None, 227, 227, 3], name='Input')
        labels = tf.placeholder(tf.float32, [None, 1], name='Labels')
        rate = tf.placeholder(tf.float32, name='rate')

        # build model.
        params = self.init_parameters()
        out = self.forward(data, params, rate)
        Cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=out, labels=labels))
        optimizer = tf.train.RMSPropOptimizer(learning_rate=lr).minimize(Cost)
        # score.
        predict = tf.round(tf.sigmoid(out))
        equal = tf.equal(labels, predict)
        correct = tf.cast(equal, tf.float32)
        accuracy = tf.reduce_mean(correct)

        # split date..
        split_data = SplitData(self.file_dir,
                               Load_samples=self.Load_samples,
                               test_rate=self.test_rate)
        train_files, test_files = split_data()

        self.N_train = len(train_files) // self.batch_size
        self.N_test = len(test_files) // self.batch_size

        # Saver..
        saver = tf.train.Saver()
        tf.add_to_collection('pre_network', out)
        init = tf.global_variables_initializer()

        # training...
        with tf.Session() as sess:
            sess.run(init)

            for epoch in range(epochs):
                loader = LoadData(self.batch_size, sess)
                next_element_train = loader.get_data(train_files)
                # running all training set...
                count = 1
                while 1:
                    try:
                        images, target = sess.run(next_element_train)
                        print('Training {}/{} \r'.format(count, self.N_train),
                              end='',
                              flush=True)
                        count += 1
                    except tf.errors.OutOfRangeError:
                        break
                    else:
                        _ = sess.run(optimizer,
                                     feed_dict={
                                         data: images,
                                         labels: target,
                                         rate: drop_rate
                                     })

                acc_train, loss_train = self.Caculate(sess, train_files,
                                                      accuracy, Cost, data,
                                                      labels, rate, 'train')
                acc_test, loss_test = self.Caculate(sess, test_files, accuracy,
                                                    Cost, data, labels, rate,
                                                    'test')
                print(
                    '[{}/{}] train loss:{:.4f} - train acc:{:.4f} - test loss:{:.4f} - test acc:{:.4f}'
                    .format(epoch + 1, epochs, loss_train, acc_train,
                            loss_test, acc_test))
                if acc_train >= 0.980:
                    break
            # Saver ....
            saver.save(sess, 'model/alexNet')
Ejemplo n.º 21
0
            self.gi=0
        else:
            count1 = 0
            for item in dataset:
                if item[-1]==dataset[0][-1]:
                    count1+=1
            count2 = len(dataset)-count1
            self.gi = (count1/len(dataset))*(count2/len(dataset))+(count2/len(dataset))*(count1/len(dataset))
        return self.gi

    # evaluate with error_rate and gini_impurity
    def evaluate(self):
        err_rate = self.error_rate(self.ori_dataset)
        gini_impurity = self.gini_impurity(self.ori_dataset)
        return err_rate,gini_impurity


if __name__ == '__main__':
    infile = sys.argv[1]
    outfile = sys.argv[2]
    ld = LoadData()
    ori_dataset = ld.load_data(infile)
    ins = Inspection(ori_dataset)
    eva = ins.evaluate()
    err_rate = eva[0]
    gini_impurity = eva[1]
    with open(outfile, 'w') as f:
        f.writelines("gini_impurity: {}\n".format(gini_impurity))
        f.writelines("error: {}\n".format(err_rate))
    # print(err_rate)
    # print(gini_impurity)
Ejemplo n.º 22
0
    # 'webpack': 'https://github.com/qq20004604/webpack-study',
    # 'react': 'https://github.com/qq20004604/react-demo',
    # 'vue': 'github: https://github.com/qq20004604/vue-scaffold\n博客专栏(1.x):https://blog.csdn.net/qq20004604/article/category/6381182',
    # '笔记': 'https://github.com/qq20004604/notes',
    # 'demo': 'https://github.com/qq20004604/some_demo',
    # '海外服务器': 'https://manage.hostdare.com/aff.php?aff=939\n这个可以做私人服务器(不需要备案),也可以找群主询问如何架设SS server的方法。',
    # 'QQ 机器人': 'https://github.com/qq20004604/qq-robot',
    # '架构': 'https://juejin.im/post/5cea1f705188250640005472',
    # 'es6': 'https://blog.csdn.net/qq20004604/article/details/78014684',
    # 'vue脚手架': 'https://github.com/qq20004604/Vue-with-webpack',
    # 'react脚手架': 'https://github.com/qq20004604/react-with-webpack',
    # 'Macbook常用软件': 'https://github.com/qq20004604/when-you-get-new-Macbook',
    # 'python的django与mysql交互': 'https://blog.csdn.net/qq20004604/article/details/89934212'
}

ld = LoadData()


def log(context, filename='./log.log'):
    with open(filename, 'a', encoding='utf-8') as f:
        f.write(
            'time:%s, sender:%s, message_type:%s, user_id:%s, content:%s\n' %
            (datetime.now(), context['sender']['nickname'],
             context['message_type'], context['sender']['user_id'],
             context['raw_message']))


@bot.on_message()
async def handle_msg(context):
    msg = context['message']
    # print(msg)
Ejemplo n.º 23
0
    def __init__(self,ori_dataset):
        self.ori_dataset = ori_dataset
        self.ld = LoadData()

        self.er = 0
        self.gi = 0
Ejemplo n.º 24
0
def hormigas():
        objload = LoadData()
        
        aux = objload.getBasesConexiones()
        print(aux)
        viajar();