Python DataHelper Beispiele, datahelper.DataHelper Python Beispiele

Beispiel #1

0

Datei anzeigen

    def on_execute(self):
        """Executes an algorithm several times"""

        while self.runs < self.maxRuns:
            # continue running until algorithm is done
            while self.algorithm.isDone is False:
                self.algorithm.execute()
                self.dataHelper.writeCSVLine(self.area)

            # if algorithm completes one run, reset state
            if self.algorithm.isDone is True and self.runs < self.maxRuns:
                print('Run {} is complete! ðŸŽ‰'.format(self.runs))
                # save area to csv
                self.dataHelper.writeArea(self.area)
                self.dataHelper = DataHelper()

                # reset area and algorithm
                self.area = copy.deepcopy(self.originalArea)
                self.algorithm = copy.copy(self.originalAlgorithm)
                self.algorithm.area = self.area
                self.runs += 1

            if self.runs == self.maxRuns:
                print('âœ¨âœ¨ I succesfully ran {}'
                      'times! âœ¨âœ¨'.format(self.runs))

Beispiel #2

0

Datei anzeigen

Datei: graph.py Projekt: iszhaoxin/KGI

    def __init__(self, logging, graph=None, file=None, start=0, ints=False):
        self.logging = logging
        if not os.path.isdir(logging): os.mkdir(logging)

        if graph == None and file != None:
            self.helper = DataHelper(file, NP=False)
            self.samples = self.helper.GetSamples()
            if ints == True:
                samples = []
                for i in self.samples:
                    samples.append([int(i[0]), int(i[1]), int(i[2])])
            self.samples = np.array(samples)
            self.G = self.readGraph(file, ints=ints)
            self.uG = self.readGraph(file, ints=ints, unweight=True)

        elif graph != None and file == None:
            self.G = graph
            self.uG = nx.Graph(self.G)

            self.samples = []
            for edge in self.G.edges():
                for i in self.G[edge[0]][edge[1]]:
                    self.samples.append([
                        edge[0], self.G[edge[0]][edge[1]][i]['attr'], edge[1]
                    ])

        else:
            raise Exception

        self.start = start
        self.node2id, self.id2node = self._node2id()
        self.edge2id, self.id2edge = self._edge2id()

Beispiel #3

0

Datei anzeigen

 def __init__(self, checkpoint_path, model_selector, cuda=False):
     #cuda = True
     self.dh = DataHelper()  # 数据库读取
     self.checkpoint_path = checkpoint_path
     self.model_selector = model_selector
     self.cuda = cuda
     l.info("[I] Model Loading....")
     self.compute_app = Application(checkpoint_path,
                                    cuda=cuda,
                                    model_name=model_selector)
     l.info("[I] Model loaded...")

Beispiel #4

0

Datei anzeigen

    def __init__(self, area, algorithm):
        """Initiate all elements necessary to run an algorithm
        without visualization

        Keyword arguments:
        area      -- the area that should be used in the algoritm
        algorithm -- the algorithm by which the given area is filled
        """

        self.area = area
        self.algorithm = algorithm
        self.dataHelper = DataHelper()

Beispiel #5

0

Datei anzeigen

    def train(self, train_file, iteration):
        print(
            "________________________________________________________________________________________________tarin starts"
        )
        model = multi_class_perceptron()
        c = DataHelper()

        instances = []
        sentence_count = 0
        for sentence in c.read_sentence(train_file):
            sentence_count += 1
            for token in sentence:
                feature = token.feature_extracter(model.feature_constructor)
                # print feature
                instances.append(
                    (feature, model.pos_constructor(token.gold_pos)))

        weights_statistics = model.weights_constructor()

        self.table_statistics.append([
            str(weights_statistics[1]),
            str(weights_statistics[0]),
            str(len(instances)),
            str(sentence_count)
        ])

        table = AsciiTable(self.table_statistics)
        print(table.table)

        for iter_round in range(iteration):
            start = time.time()
            for (feature, pos_tag) in instances:
                #print (feature, pos_tag)
                score = model.weight_scores(feature)
                #print score
                predicted_tag = model.predict(score)
                #print predicted_tag
                if predicted_tag != pos_tag:
                    model.update(feature, pos_tag, predicted_tag)
            end = time.time()
            print 'Iteration' + '\t' + str(
                iter_round +
                1) + '\t' + 'done.', " runs at:", end - start, "seconds"
            model_file = 'dumps\\model_' + str(iter_round + 1) + '.dump'
            model.save(model_file)
        print(
            "________________________________________________________________________________________________tarin ends"
        )

Beispiel #6

0

Datei anzeigen

    def __init__(self, area, algorithm, runs):
        """Initiate all elements necessary to run an algorithm consecutively
        without visualizations.

        Keyword arguments:
        area      -- the area that should be visualised
        algorithm -- the algorithm by which the given area is filled
        runs      -- the amount of times the algorithm should run
        """
        self.area = area
        self.algorithm = algorithm
        self.originalArea = copy.deepcopy(area)
        self.originalAlgorithm = copy.copy(algorithm)
        self.runs = 0
        self.dataHelper = DataHelper()
        self.maxRuns = runs

Beispiel #7

0

Datei anzeigen

Datei: bulkvisualizer.py Projekt: jvlaar/heuristiek

class BulkVisualizer(Visualizer):
    """Draws consecutive visualisations for consecuetively created areas"""
    def __init__(self, area, algorithm, runs):
        """Initiate all elements necessary to run an algorithm consecutively
        and create consecutive visualizations for them.

        Keyword arguments:
        area      -- the area that should be visualised
        algorithm -- the algorithm by which the given area is filled
        runs      -- the amount of times the algorithm should be run and
                     the amount of visualizations to be made
        """

        super().__init__(area, algorithm)
        self.originalArea = copy.deepcopy(area)
        self.originalAlgorithm = copy.copy(algorithm)
        self.runs = 0
        self.allTimeHigh = 0
        self.dataHelper = DataHelper()
        self.maxRuns = runs

    def on_render(self):
        """Runs and visualizes the algorithm"""

        while self.runs < self.maxRuns:
            # continue running until algorithm is done
            super().on_render()

            # track highest found area value
            if self.area.price > self.allTimeHigh:
                self.allTimeHigh = self.area.price

            # when a run is finished...
            if self.algorithm.isDone is True and self.runs < self.maxRuns:
                print('ðŸŽ‰ðŸŽ‰Run {} is complete! ðŸŽ‰ðŸŽ‰'.format(self.runs))
                # ...save values to csv file
                self.dataHelper.writeArea(self.area)

                # ...restore to a fresh state (empty area)
                self.area = copy.deepcopy(self.originalArea)
                self.algorithm = copy.copy(self.originalAlgorithm)
                self.algorithm.area = self.area
                self.runs += 1

            if self.runs == self.maxRuns:
                print('âœ¨âœ¨ I succesfully ran {} times! âœ¨âœ¨'.format(
                    self.runs))

Beispiel #8

0

Datei anzeigen

Datei: bulkvisualizer.py Projekt: jvlaar/heuristiek

    def __init__(self, area, algorithm, runs):
        """Initiate all elements necessary to run an algorithm consecutively
        and create consecutive visualizations for them.

        Keyword arguments:
        area      -- the area that should be visualised
        algorithm -- the algorithm by which the given area is filled
        runs      -- the amount of times the algorithm should be run and
                     the amount of visualizations to be made
        """

        super().__init__(area, algorithm)
        self.originalArea = copy.deepcopy(area)
        self.originalAlgorithm = copy.copy(algorithm)
        self.runs = 0
        self.allTimeHigh = 0
        self.dataHelper = DataHelper()
        self.maxRuns = runs

Beispiel #9

0

Datei anzeigen

class NoDrawBulkVisualizer:
    """Runs an algorithm several times without visualization"""
    def __init__(self, area, algorithm, runs):
        """Initiate all elements necessary to run an algorithm consecutively
        without visualizations.

        Keyword arguments:
        area      -- the area that should be visualised
        algorithm -- the algorithm by which the given area is filled
        runs      -- the amount of times the algorithm should run
        """
        self.area = area
        self.algorithm = algorithm
        self.originalArea = copy.deepcopy(area)
        self.originalAlgorithm = copy.copy(algorithm)
        self.runs = 0
        self.dataHelper = DataHelper()
        self.maxRuns = runs

    def on_execute(self):
        """Executes an algorithm several times"""

        while self.runs < self.maxRuns:
            # continue running until algorithm is done
            while self.algorithm.isDone is False:
                self.algorithm.execute()
                self.dataHelper.writeCSVLine(self.area)

            # if algorithm completes one run, reset state
            if self.algorithm.isDone is True and self.runs < self.maxRuns:
                print('Run {} is complete! ðŸŽ‰'.format(self.runs))
                # save area to csv
                self.dataHelper.writeArea(self.area)
                self.dataHelper = DataHelper()

                # reset area and algorithm
                self.area = copy.deepcopy(self.originalArea)
                self.algorithm = copy.copy(self.originalAlgorithm)
                self.algorithm.area = self.area
                self.runs += 1

            if self.runs == self.maxRuns:
                print('âœ¨âœ¨ I succesfully ran {}'
                      'times! âœ¨âœ¨'.format(self.runs))

Beispiel #10

0

Datei anzeigen

class Model_Performance():
    '''

    '''
    def __init__(self, checkpoint_path, model_name):
        self.chkpoint = checkpoint_path
        self.app = Application(load_path=checkpoint_path,
                               model_name=model_name)
        self.datahelper = DataHelper()

    def CrossArchTestFast(self, arch1, arch2):
        '''
        :param arch1: path to sqlite database
        :param arch2: path to sqlite database
        :return:
        '''
        pairs = self.datahelper.get_cross_archtecture_pair(arch1, arch2)

        result = []
        lables = []
        pool = mp.Pool(processes=10)
        start_time = datetime.now()
        pairs = pairs[0:10]
        for (source, target, label) in tqdm(pairs, desc="Sim Computing."):
            source_tree = source[-1]
            target_tree = target[-1]
            lables.append(label)
            result.append(
                pool.apply_async(self.app.similarity_tree, (
                    source_tree,
                    target_tree,
                )))
        pool.close()
        pool.join()
        predictions = []
        for res in result:
            predictions.append(res.get())
        time_cost = (datetime.now() - start_time).seconds
        fpr, tpr, thresholds = roc_curve(np.array(lables),
                                         np.array(predictions))
        logger.info("===> architecture info : %s vs %s" % (arch1, arch2))
        logger.info("===> time: %s" % (datetime.now()))
        logger.info("model: %s" % self.chkpoint)
        logger.info(
            "compute %d function pairs, which cost %d seconds; %f pairs per sec"
            % (len(pairs), time_cost, len(pairs) * 1.0 / time_cost))
        logger.info("predictions= %s" % json.dumps(predictions))
        logger.info("labels= %s" % json.dumps(lables))
        logger.info("fpr= %s" % json.dumps(fpr.tolist()))
        logger.info("tpr= %s" % json.dumps(tpr.tolist()))
        logger.info("thresholds= %s" % json.dumps(thresholds.tolist()))

Beispiel #11

0

Datei anzeigen

class NoDrawVisualizer:
    """Runs an algorithm without visualization"""
    def __init__(self, area, algorithm):
        """Initiate all elements necessary to run an algorithm
        without visualization

        Keyword arguments:
        area      -- the area that should be used in the algoritm
        algorithm -- the algorithm by which the given area is filled
        """

        self.area = area
        self.algorithm = algorithm
        self.dataHelper = DataHelper()

    def on_execute(self):
        """Starts and executes the algorithm"""

        # while algorithm is not done, run it
        while self.algorithm.isDone is False:
            self.algorithm.execute()

            # save area state between every step
            self.dataHelper.writeArea(self.area)

Beispiel #12

0

Datei anzeigen

    def viterbi_tagger(self, test_file):
        print "________________________________________________________________________________________________viterbi_tagger starts"
        c_3 = DataHelper("dataset\\train.col")

        stream_emission_matrix = gzip.open("dumps\\emission_matrix.dump", 'rb')
        emission_matrix = cPickle.load(stream_emission_matrix)
        stream_emission_matrix.close()

        stream_transition_matrix = gzip.open("dumps\\transition_matrix.dump",
                                             'rb')
        transition_matrix = cPickle.load(stream_transition_matrix)
        stream_transition_matrix.close()

        for x in transition_matrix:
            for p in transition_matrix[x]:
                if transition_matrix[x][p] > 0.2:
                    print p, x, transition_matrix[x][p]

        sentence_count = 0
        word_count = 0

        output = open('dumps\\dev-predicted-viterbi.col', 'w')
        for sentence in c_3.read_sentence(test_file):
            observation = sentence.word_list()
            sentence_count += 1
            word_count += len(observation)
            #print observation
            states = sentence.tag_list()
            #print states
            #for word in observation:
            #   if word in emission_matrix:
            #        states = states + emission_matrix[word].keys()
            #    else:
            #        states = states + ['NN']

            states = list(set(states))
            #states.insert(0, '<S>')

            #start = time.time()
            prediction = self.viterbi_smoothing(observation, states,
                                                emission_matrix,
                                                transition_matrix)
            #end = time.time()
            #print 'Sentence '+str(sentence_count)+' at', end - start

            for i in range(len(prediction[0])):
                output.write('%s\t%s\n' % (prediction[1][i], prediction[0][i]))
            output.write('\n')

        output.close()

        Ctag = DataHelper("dataset\\test.col")
        TagSet = Ctag.tagSet(Ctag)

        self.table_statistics.append([
            str(emission_matrix.__len__()),
            str(len(TagSet)),
            str(word_count),
            str(sentence_count)
        ])
        table = AsciiTable(self.table_statistics)
        print(table.table)
        print "________________________________________________________________________________________________viterbi_tagger ends"

        Cgold = DataHelper("dataset\\test.col")
        GoldWordTagList = Cgold.Tokenize(Cgold)

        Cpred = DataHelper("dumps\\dev-predicted-viterbi.col")
        PredWordTagList = Cpred.Tokenize(Cpred)

        eval = Evaluation()
        per_tag = False
        f_measure = eval.Evaluate(per_tag, GoldWordTagList, PredWordTagList,
                                  TagSet)

        print 'F-Measure Micro:' + '\t' + f_measure[0]
        print 'F-Measure Macro:' + '\t' + f_measure[1]
        print

        final_eval = Evaluation()
        f_per_tag = True
        per_tag_table = final_eval.Evaluate(f_per_tag, GoldWordTagList,
                                            PredWordTagList, TagSet)
        print per_tag_table

Beispiel #13

0

Datei anzeigen

    def tagger(self, filename, iteration):
        print "________________________________________________________________________________________________Perceptron tagger starts"
        for iter_round in range(iteration):
            model_file = 'dumps\\model_' + str(iter_round + 1) + '.dump'
            print 'Reading from file' + '\t' + model_file.split('\\')[1]
            model = multi_class_perceptron(model_file)
            c = DataHelper()

            output = open('dumps\\dev-predicted.col', 'w')
            for sentence in c.read_sentence(filename):
                for token in sentence:
                    feature = token.feature_extracter(model.return_features)
                    score = model.weight_scores(feature)
                    predicted_tag = model.predict(score)
                    pos_tag = model.pos_constructor(token.gold_pos)
                    output.write(
                        '%s\t%s\n' %
                        (token.word, model.return_pos_reverse(predicted_tag)))

                output.write('\n')
            output.close()

            Cgold = DataHelper("dataset\\test.col")
            GoldWordTagList = Cgold.Tokenize(Cgold)

            Cpred = DataHelper("dumps\\dev-predicted.col")
            PredWordTagList = Cpred.Tokenize(Cpred)

            Ctag = DataHelper("dataset\\test.col")
            TagSet = Ctag.tagSet(Ctag)

            eval = Evaluation()
            per_tag = False
            f_measure = eval.Evaluate(per_tag, GoldWordTagList,
                                      PredWordTagList, TagSet)

            print 'F-Measure Micro:' + '\t' + f_measure[0]
            print 'F-Measure Macro:' + '\t' + f_measure[1]
            print
        final_eval = Evaluation()
        f_per_tag = True
        per_tag_table = final_eval.Evaluate(f_per_tag, GoldWordTagList,
                                            PredWordTagList, TagSet)
        print per_tag_table

        print "________________________________________________________________________________________________Perceptron tagger ends"

Beispiel #14

0

Datei anzeigen

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from network import SimpleNet, SimpleRNN
from datahelper import DataHelper

x = SimpleNet.x
cnn = SimpleNet.build_cnn(x, reuse=None, raw_out=True)

rnn = SimpleRNN.build_graph(cnn)
infer = tf.nn.softmax(rnn)

y = SimpleRNN.y

dh = DataHelper()

saver = tf.train.Saver()
#print('Recovering')
#saver.recover_last_checkpoints('../models')

with tf.Session() as sess:
    print('Restoring')
    saver.restore(sess, '../models/cnn-rnn-100.ckpt')

    while True:
        img, lbl = dh.get_next_example(split_channels=True)

        _y = sess.run(infer, feed_dict={x:img})
        print(f'Label:{lbl}\nGuess:{_y}')

Beispiel #15

0

Datei anzeigen

def main():
    """Present the user with choices about the algorithm to run"""

    # set some values
    placementOrder = None
    waterAmountChoice = None

    print("----------------------")
    print("WELCOME TO AMSTELHAEGE \n")

    # let the user choose a new or existing grid
    gridChoice = int(
        input('Do you want to load in a grid '
              'or start from scratch?\n'
              '1: Load a grid\n'
              '2: Start from scratch\n'
              'Your choice: '))
    print("")

    # let the user choose an algorithm
    algorithmChoice = int(
        input('What algorithm do you want to run?\n'
              '1: Random \n'
              '2: Greedy \n'
              '3: SpeedRandom\n'
              '4: HillClimbing\n'
              '5: Simmulated Annealing\n'
              '6: Do Nothing (show only)\n'
              'Your choice: '))
    if algorithmChoice == 4 or algorithmChoice == 5:
        print("")
        totalIterations = int(
            input('How many steps should algorithm make?\n'
                  'Your choice: '))
        if algorithmChoice == 5:
            print("")
            typeOfSimulatedAnnealing = int(
                input('What type of Simulated'
                      ' Annealing?\n'
                      '1: lineair \n'
                      '2: exponential\n'
                      '3: sigmoidal\n'
                      'Your choice: '))
        if algorithmChoice == 5:
            print("")
            beginTemp = int(
                input('What is the begin temperature? (Try "50")\n'
                      'Your choice: '))
            print("")
            endTemp = int(
                input('What is the end temperature? (Try "0")\n'
                      'Your choice: '))
            print("")
            correctionShortening = int(
                input('What correction factor for'
                      ' shortening the would you like'
                      ' to use? (Try "1000")\n'
                      ' Your choice: '))
    print("")

    # let the user choose a type of visualization.
    # Normal visualizer renders a single visualisation
    # Bulk visualizer can render several maps after one another
    visualizerChoice = int(
        input('What visualizer do you want?\n'
              '1: Normal visualizer\n'
              '2: Bulk visualizer\n'
              '3: No-draw normal visualizer\n'
              '4: No-draw bulk visualizer\n'
              'Your choice: '))
    print("")
    isEmpty = True
    fhAmount = 0
    bAmount = 0
    mAmount = 0

    if gridChoice == 1:
        # ask the user for the file where the existing grid is stored
        fileName = str(input('Please provide a file name: \n' 'Your choice: '))
        area = DataHelper(fileName).getArea()
        isEmpty = False
    else:
        # or create a new grid
        area = Area()
        houseAmountChoice = int(
            input('How many houses do you want? \n'
                  '1: 20\n'
                  '2: 40\n'
                  '3: 60\n'
                  'Your choice: '))
        print("")
        # set the correct ratio of house types
        # for different amounts of houses
        if houseAmountChoice == 1:
            fhAmount = 12
            bAmount = 5
            mAmount = 3
        elif houseAmountChoice == 2:
            fhAmount = 24
            bAmount = 10
            mAmount = 6
        elif houseAmountChoice == 3:
            fhAmount = 36
            bAmount = 15
            mAmount = 9

    # if applicable for the algorithm chosen, provide further choices
    if algorithmChoice != 2 and gridChoice != 1:
        placementOrder = int(
            input('In what order do you want houses '
                  'to be placed on the map?\n'
                  '1: Random \n'
                  '2: First Mansions, then Bungalows, '
                  'then Family homes \n'
                  'Your choice: '))
        print("")
        waterAmountChoice = int(
            input('How many water areas'
                  ' do you want on the map? \n'
                  '1: 1 Area \n'
                  '2: 2 Area\'s \n'
                  '3: 3 Area\'s \n'
                  '4: 4 Area\'s \n'
                  '5: Random amount of Area\'s \n'
                  'Your choice: '))
        if waterAmountChoice == "5":
            waterAmountChoice = "Random"
        print("")

    # initiate the algorithm chosen by the user
    if algorithmChoice == 1:
        algorithm = RandomAlgorithm(area, fhAmount, bAmount, mAmount,
                                    placementOrder, waterAmountChoice, isEmpty)
    elif algorithmChoice == 2:
        algorithm = GreedyAlgorithm(area, fhAmount, bAmount, mAmount, isEmpty)
    elif algorithmChoice == 3:
        algorithm = SpeedRandomAlgorithm(area, fhAmount, bAmount, mAmount,
                                         placementOrder, waterAmountChoice,
                                         isEmpty)
    elif algorithmChoice == 4:
        algorithm = HillClimbingAlgorithm(area, fhAmount, bAmount, mAmount,
                                          placementOrder, waterAmountChoice,
                                          isEmpty, totalIterations)
    elif algorithmChoice == 5:
        algorithm = HillClimbingAlgorithm(area, fhAmount, bAmount, mAmount,
                                          placementOrder, waterAmountChoice,
                                          isEmpty, totalIterations, beginTemp,
                                          endTemp, typeOfSimulatedAnnealing,
                                          correctionShortening)

    elif algorithmChoice == 6:
        algorithm = Algorithm(area, fhAmount, bAmount, mAmount, isEmpty)

    # initiate the visualization requested by the user
    if visualizerChoice == 1:
        # enable downward graphing
        if algorithmChoice == 5:
            visualizer = Visualizer(area, algorithm, True)
        else:
            visualizer = Visualizer(area, algorithm, False)
    elif visualizerChoice == 2:
        runs = int(input('How many runs do you want to do? \n'
                         'Your choice: '))
        visualizer = BulkVisualizer(area, algorithm, runs)
    elif visualizerChoice == 3:
        visualizer = NoDrawVisualizer(area, algorithm)
    elif visualizerChoice == 4:
        runs = int(input('How many runs do you want to do? \n'
                         'Your choice: '))
        visualizer = NoDrawBulkVisualizer(area, algorithm, runs)

    # notify the user of the end of the menu
    print("Starting your Algorithm...")
    print("----------------------")
    visualizer.on_execute()

Beispiel #16

0

Datei anzeigen

Datei: main.py Projekt: flandy2010/spider-for-mongolian-news

    parser = argparse.ArgumentParser()
    parser.add_argument("--more_news_times",
                        type=int,
                        default=3,
                        help="Number of click the 'more news' button")
    parser.add_argument("--threading_num",
                        type=int,
                        default=3,
                        help="Number of threading")
    parser.add_argument('-o', "--output_dir", type=str, default="../output")
    parser.add_argument('-i',
                        "--ip_list_file",
                        type=str,
                        default="../input/alive_ip_list.txt")
    parser.add_argument('-v',
                        "--visited_url",
                        type=str,
                        default="../input/visited_url.txt")
    parser.add_argument('-u',
                        "--unvisited_url",
                        type=str,
                        default="../input/unvisited_url.txt")
    parser.add_argument('-r',
                        "--root_url",
                        type=str,
                        default="../input/root_url.txt")
    args = parser.parse_args()

    datahelper = DataHelper()
    spider = Spider(datahelper=datahelper, args=args)
    spider.start()

Beispiel #17

0

Datei anzeigen

def _train_network(net, eval_net):
    global params
    global x
    global y

    iters = tf.Variable(1, trainable=False)
    learning_rate = None
    if params['decay_steps']:
        learning_rate = tf.train.exponential_decay(
            params['start_learning_rate'], iters, params['decay_steps'],
            params['decay_base'])
    else:
        learning_rate = tf.Variable(params['start_learning_rate'],
                                    trainable=False)
    with tf.name_scope('loss'):
        #loss_weights =  1.003 - tf.reduce_max(y, axis=1)

        kl = lambda p, q: tf.losses.softmax_cross_entropy(
            p, q, reduction=tf.losses.Reduction.MEAN)
        hs_kl = lambda p, q: tf.multiply(0.5, tf.square(kl(p, q)))

        loss = tf.losses.softmax_cross_entropy(
            y, net, weights=1.0, reduction=tf.losses.Reduction.MEAN)
        #loss = tf.nn.softmax_cross_entropy_with_logits(logits=net,
        #                                               labels=y,
        #                                               weights=loss_weights,
        #                                               reduction=tf.losses.Reduction.MEAN)
        #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=params['momentum'])
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

        grads = optimizer.compute_gradients(loss)
        with tf.name_scope('clipping'):
            grads = [(tf.clip_by_value(grad, -1.5, 1.5), var)
                     for grad, var in grads]
        update = optimizer.apply_gradients(grads, global_step=iters)

    # with tf.name_scope('grads'):
    #     for grad, var in grads:
    #         tf.summary.histogram(f"{var.name.split(':')[0]}", grad)

    # with tf.name_scope('weights'):
    #     for grad, var in grads:
    #         tf.summary.histogram(f"{var.name.split(':')[0]}", var)

    learning_rate_reduce = params['learning_rate_reduce']

    # this should have a more general implementation, we chose 0 because
    # accuracy will grow as it improves
    top_result = 0.0
    dh = DataHelper(batch_size=params['batch_size'],
                    train_size=params['train_size'],
                    label_noise=params['label_noise'],
                    bands=params['bands'],
                    transform_func=eval(params['trans_func'])
                    if params['trans_func'] else None)

    with tf.name_scope('metrics'):
        evaluate.evaluate_tensorboard(eval_net, y)
    summaries = tf.summary.merge_all()

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        if params['restore']:
            saver.restore(sess,
                          tf.train.latest_checkpoint(params['model_dir']))
        else:
            sess.run(init)

        trainWriter = tf.summary.FileWriter(params['tf_train_dir'],
                                            graph=sess.graph)
        testWriter = tf.summary.FileWriter(params['tf_test_dir'],
                                           graph=sess.graph)
        # run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        # run_metadata = tf.RunMetadata()
        run_options = None
        run_metadata = None

        top_result = 0
        while iters.eval() < params['iter_limit']:
            current_iter = iters.eval()

            if learning_rate_reduce and current_iter in learning_rate_reduce:
                sess.run(learning_rate.assign(learning_rate.eval() / 10))

            if params['print']:
                tf.logging.info(f"Training iter:{current_iter}")

            batch_xs, batch_ys = dh.get_next_batch(iter_based=True)
            batch = {x: batch_xs, y: batch_ys}
            sess.run(update, feed_dict=batch)

            if current_iter % 10 == 0:
                if params['print']:
                    tf.logging.info("Evaluating")
                s = sess.run(summaries, feed_dict=batch)
                trainWriter.add_summary(s, current_iter)

            if current_iter % 100 == 0:
                if params['print']:
                    tf.logging.info('Testing')

                batch_xs, batch_ys = dh.get_next_batch(force_test=True)
                batch[x] = batch_xs
                batch[y] = batch_ys
                s = sess.run(summaries, feed_dict=batch)
                testWriter.add_summary(s, current_iter)

                evals = evaluate.evaluate(sess, eval_net, x, y, batch_xs,
                                          batch_ys, params['test_progress'])

                if params['save_progress'] and evals[0] > top_result:
                    if params['print']:
                        tf.logging.info('Saving checkpoint')
                    model_path = os.path.join(params['model_dir'],
                                              'res-net.ckpt')
                    saver.save(sess, model_path, global_step=iters)
                    top_result = evals[0]

    # This needs to be printed so that the async trainer can see the result
    if params['rtrn_eval']:
        print(top_result)

Beispiel #18

0

Datei anzeigen

        'name': 'mask',
        'value': Mask.RANDOM
    },
    # {'name': 'learning_rate', 'value': 0.01},
    # (parameters for NECTR)
    # {'name': 'nectr_n_hidden_layers', 'min': 1, 'max': 2, 'step': 1},
    # {'name': 'nectr_n_neurons', 'min': 5, 'max': 45, 'step': 10}]
    # {'name': 'nectr_poisson', 'value': True},
    # {'name': 'nectr_item_counts', 'value': True},
    # {'name': 'nectr_train_tf_on_solutions', 'value': False},
    # {'name': 'nectr_learning_rate', 'value': 0.1},
    # {'name': 'nectr_nn_regularization_type', 'value': 'l1'},
    # {'name': 'nectr_nn_regularization', 'type': 'exp', 'min': 1e-2, 'max': 1e-2},
    # {'name': 'nectr_lambda_completion', 'type': 'exp', 'min': 2e-2, 'max': 2e-0},
    {
        'name': 'nectr_n_epoch_completion',
        'min': 2,
        'max': 2,
        'step': 2
    }
]

# Setup DataHelper utils
DataHelper.setup(PATH_TO_DATA)

# Setup cross validation
cv = CrossValidation(datahelper=DataHelper, parameters=parameters)

# Run the cross validation pipeline
cv.run_pipeline(model, PATH_TO_RESULTS, plot=False)

Beispiel #19

0

Datei anzeigen

def _train_network(net):
    global params
    global x
    global y

    iters = tf.Variable(0, trainable=False)
    learning_rate = None
    if params['decay_steps']:
        learning_rate = tf.train.exponential_decay(
            params['start_learning_rate'], iters, params['decay_steps'],
            params['decay_base'])
    else:
        learning_rate = tf.Variable(params['start_learning_rate'])

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, y))

    # find a way tp paramertize the optimizer
    optimizer = tf.train.MomentumOptimizer(learning_rate, params['momentum'],
                                           params['nesterov'])
    optimize = optimizer.minimize(cost, global_step=iters)

    init = tf.initialize_all_variables()
    saver = tf.train.Saver()

    learning_rate_reduce = params['learning_rate_reduce']

    start = time.time()
    # this should have a more general implementation, we chose 0 because
    # accuracy will grow as it improves
    top_result = 0.0
    with tf.Session() as sess:
        sess.run(init)

        for epoch in range(1, params['epoch_limit'] + 1):
            if params['print']:
                print epoch

            dh = DataHelper(batch_size=params['batch_size'],
                            train_size=params['train_size'],
                            label_noise=params['label_noise'],
                            bands=params['bands'],
                            transform_func=eval(params['trans_func'])
                            if params['trans_func'] else None)

            if learning_rate_reduce and epoch in learning_rate_reduce:
                sess.run(learning_rate.assign(learning_rate.eval() / 10.0))

            while dh.training:
                batch_xs, batch_ys = dh.get_next_batch()
                sess.run(optimize, feed_dict={x: batch_xs, y: batch_ys})

                if iters.eval() % 20 == 0:
                    evaluate.evaluate(sess, net, x, y, batch_xs, batch_ys,
                                      params['train_progress'])

            #testing
            batch_xs, batch_ys = dh.get_next_batch()
            results = evaluate.evaluate(sess, net, x, y, batch_xs, batch_ys,
                                        params['test_progress'])

            if params['save_progress'] and results[0] > top_result:
                if params['print']:
                    print 'Saving checkpoint'
                saver.save(sess, params['model_dir'], global_step=iters)
                top_result = results[0]

    if params['print']:
        print 'Epoch took {} seconds'.format(time.time() - start)

    if params['rtrn_eval']:
        print top_result

Beispiel #20

0

Datei anzeigen

def p_drop(a, p):
    a[a<np.percentile(a, p)] *= 1e-3
    return a


scaler = MinMaxScaler()
#scaler = MinMaxScaler(feature_range=(-1, 1))
valid_img = lambda a: a.sum()>0 and np.isfinite(a).sum()==np.prod(a.shape)
scale = lambda a: scaler.fit_transform(a.reshape(-1,1)).reshape(84,84).astype(np.float32)
mean_subtraction = lambda a: a-a.mean()
drop_percentile = lambda a, p: p_drop(a,p)
identity = lambda a: a
b_trans = lambda a: scale(drop_percentile(a, 50)) if valid_img(a) else a

dh = DataHelper(batch_size=batch_size, band_transform_func=b_trans)
epoch = 1
len_epoch = len(dh._train_imgs)
print(f'Epoch length = {len_epoch}')

summaries = tf.summary.merge_all()
saver = tf.train.Saver()

with tf.Session() as sess:
    if restore_file:
        saver.restore(sess, restore_file)
    else:
        sess.run(init)

    trainWriter = tf.summary.FileWriter('../report/tf-log/train', graph=sess.graph)
    testWriter = tf.summary.FileWriter('../report/tf-log/test', graph=sess.graph)

Beispiel #21

0

Datei anzeigen

 def _keys(self, fn):
     helper = DataHelper(fn)
     if not os.path.isfile('../data/middle/entity2id.txt'):
         helper.id2file()
     return list(helper.node2id.keys())

Beispiel #22

0

Datei anzeigen

import tensorflow as tf
import numpy as np
import sys

from datahelper import DataHelper

VOCAB_SIZE = 10000
EMBEDDING_SIZE = 1
LEARNING_RATE = 1e-3
MINI_BATCH_SIZE = 256
NORMALIZE_LAYER = 0

data_helper = DataHelper(_voc_size=VOCAB_SIZE)

data_helper.load_train_ins_and_process("data/train.50_51.ins")
data_helper.load_eval_ins("data/eval.52.ins")

print "data loaded"


def eval_auc(eval_res, eval_label):
    sorted_res = np.argsort(eval_res, axis=0)

    m = 0
    n = 0
    rank = 0

    for k in range(sorted_res.shape[0]):
        idx = sorted_res[k][0]
        if eval_label[idx][0] == 1:
            m += 1

Beispiel #23

0

Datei anzeigen

Datei: dnn.discrete.py Projekt: ustcblue/mllib

import tensorflow as tf
import numpy as np
import sys

from datahelper import DataHelper

VOCAB_SIZE = 10000
EMBEDDING_SIZE = 1
LEARNING_RATE = 1e-3
MINI_BATCH_SIZE = 256
NORMALIZE_LAYER = 0

data_helper = DataHelper(_voc_size=VOCAB_SIZE)

data_helper.load_train_ins_and_process("data/train.50_51.ins")
data_helper.load_eval_ins("data/eval.52.ins")

print "data loaded"


def eval_auc(eval_res, eval_label):
    sorted_res = np.argsort(eval_res, axis=0)

    m = 0
    n = 0
    rank = 0

    for k in range(sorted_res.shape[0]):
        idx = sorted_res[k][0]
        if eval_label[idx][0] == 1:
            m += 1

Beispiel #24

0

Datei anzeigen

class GraphMes:
    def __init__(self, graph=None, file=None, start=0, ints=False):        
        if graph==None and file!=None:  
            self.helper = DataHelper(file,NP=False)
            self.samples = self.helper.GetSamples()
            if ints == True:
                samples = []
                for i in self.samples:
                    samples.append([int(i[0]), int(i[1]),int(i[2])])
            self.samples = np.array(samples)
            self.G = self.readGraph(file, ints=ints)
            self.uG = self.readGraph(file, ints=ints, unweight = True)
        
        elif graph!=None and file==None:
            self.G = graph
            self.uG = nx.Graph(self.G)

            self.samples = []
            for edge in self.G.edges():
                for i in self.G[edge[0]][edge[1]]:
                    self.samples.append([edge[0], self.G[edge[0]][edge[1]][i]['attr'], edge[1]])

        else:
            raise Exception
        
        self.start = start
        self.node2id, self.id2node = self._node2id()
        self.edge2id, self.id2edge = self._edge2id()

    def readGraph(self, sf, ints=False, unweight = False):
        self.SamplesCnt = len(self.samples)
        if unweight == True:
            G  = nx.Graph()
            for sample in self.samples:
                G.add_edge(sample[0],sample[2])
        else:
            G  = nx.MultiDiGraph()
            for sample in self.samples:
                G.add_edge(sample[0],sample[2],attr=sample[1])
        return G

    def graph2id(self, of):    
        with open(of, 'w') as f:
            for h,r,t in self.samples:
                f.write(str(self.node2id[h])+' '+str(self.edge2id[r])+' '+str(self.node2id[t])+'\n')

    def _node2id(self):
        node2id = dict()
        id2node = dict()
        index = 0
        for node in self.G.nodes():
            node2id.update({node:self.start+index})
            id2node.update({self.start+index:node})
            index += 1
        return node2id, id2node

    def _edge2id(self):
        edge2id = dict()
        id2edge = dict()
        self.attrs = set()
        for edge in self.G.edges():
            for i in self.G[edge[0]][edge[1]]:
                # print(self.G[edge[0]][edge[1]][i]['attr'])
                self.attrs.add(self.G[edge[0]][edge[1]][i]['attr'])
        index = 0
        for attr in self.attrs:
            edge2id.update({attr:self.start+index})
            id2edge.update({self.start+index:attr})
            index += 1
        return edge2id, id2edge
    
    def id2file(self, nodefn, edgefn):
        with open(nodefn, 'w') as nf:
            for i in range(len(self.node2id)):
                nf.write(self.id2node[i]+' '+str(i)+'\n')
        with open(edgefn, 'w') as ef:
            for i in range(len(self.edge2id)):
                ef.write(self.id2edge[i]+' '+str(i)+'\n')

    def _update_margin(self, searched, margin):
        margin_backup = copy.copy(margin)
        for i in margin_backup:
            for j in self.G.neighbors(i):
                if j not in searched:
                    margin.add(j)
        for i in margin_backup:
            margin.remove(i)
            searched.add(i)
            
        if len(margin) == 0:
            random_sampling = np.random.randint(0, len(self.nodes)-1)
            while( random_sampling not in searched and len(margin)==0):
                margin.add(random_sampling)
                random_sampling = np.random.randint(0, len(self.nodes)-1)
        
    
    def cohesive(self, windowSize):
        all_bs = nx.eigenvector_centrality(self.uG)

        searched = set()
        margin = set()
        windows = set()
        center = np.random.randint(0,len(self.nodes)-1)
        windows.add(center)
        margin.add(center)
        searched.add(center)

        while(len(windows) < windowSize):
            margin_bs = {}
            self._update_margin(searched, margin)
            for i in margin:
                margin_bs.update({i:all_bs[i]})
            margin_bs_sort = texthelper.sortDict(margin_bs, By="value", reverse=True)
            for j in margin_bs_sort:
                windows.add(j[0])
                if len(windows) >= windowSize:
                    break
        return windows
        
    @property
    def nodes(self):
        return list(self.G.nodes)
    @property
    def nodeCnt(self):
        return len(self.G.nodes)
    @property
    def samplesCnt(self):
        return len(self.samples)
    @property
    def edges(self):
        return list(self.attrs)
    @property
    def edgeCnt(self):
        return len(self.attrs)

Beispiel #25

0

Datei anzeigen

class Asteria():
    '''
    Functions：
    1. calculate the similarity between functions
    2.  calculate the similarity between asts
    3.  calculate the similarity between ast encodings
    '''
    def __init__(self, checkpoint_path, model_selector, cuda=False):
        #cuda = True
        self.dh = DataHelper()  # 数据库读取
        self.checkpoint_path = checkpoint_path
        self.model_selector = model_selector
        self.cuda = cuda
        l.info("[I] Model Loading....")
        self.compute_app = Application(checkpoint_path,
                                       cuda=cuda,
                                       model_name=model_selector)
        l.info("[I] Model loaded...")

    def ast_encode_similarity(self, sources=[], targets=[], threshold=0):
        '''
        :param sources:list: source asts
        :param targets:list: target asts
        :return: dict: key is function_name， value is a dict :{'rank':[], 'info':(function_name, elf_path, elf_file_name, caller, callee, ast_encode)}
        '''
        result = defaultdict(dict)
        for (function_name, elf_path, elf_name, scaller, scallee,
             ast_encode), _ in tqdm(sources):
            res = []
            pool = Pool(processes=cpu_count() - 2)
            for (tfunction_name, telf_path, telf_name, tcaller, tcallee,
                 tast_encode), _ in targets:
                if tast_encode is None:
                    print("%s encode not exits" % tfunction_name)
                res.append((pool.apply_async(
                    self.compute_app.similarity_treeencoding_with_correction,
                    (json.loads(ast_encode), json.loads(tast_encode),
                     (scaller, scallee), (tcaller, tcallee))), tfunction_name,
                            telf_path, telf_name))
            pool.close()
            pool.join()
            similarity_list = []
            for r in res:
                sim = r[0].get()
                if sim >= threshold:
                    similarity_list.append(((r[1], r[2]), sim))
            similarity_list.sort(key=lambda x: x[1], reverse=True)  # 排序
            result[function_name]['rank'] = similarity_list
            result[function_name]['info'] = (function_name, elf_path,
                                             telf_name)
        return result

    def prefilter(self, ast1, ast2):
        '''
        :param ast1:
        :param ast2:
        :return: if ast1 and ast2 are too different , return 1.
        '''
        c1 = ast1.num_children
        c2 = ast2.num_children
        if abs(c1 - c2) > 30:
            return 1

        if c1 / c2 > 3 or c2 / c1 > 3:
            return 1
        return 0

    def ast_similarity(self,
                       sources=[],
                       targets=[],
                       astfilter=None,
                       threshold=0):
        '''
        :param sources: list: source asts
        :param targets: list: target asts
        func_info:[function_name, elf_path, elf_file_name, caller, callee, ast_encode]
        :param astfilter: a filter function to filter out ast pairs which are too different.
        :return: dict: key
        {'rank':[], 'info':(function_name, elf_path, elf_file_name, caller, callee, ast_encode)}
        '''
        result = {}
        N = len(sources)
        i = 0
        TN = len(targets)
        astfilter = self.prefilter

        if astfilter:
            l.error("Filter Function is applied.")
        for s_func_info, s_ast in sources:
            i += 1
            result[s_func_info[0]] = {'rank': '', 'info': ''}
            res = []
            with tqdm(targets, desc="[%d] of %d" % (i, N),
                      dynamic_ncols=True) as t:
                for func_info, t_ast in t:
                    if astfilter and astfilter(s_ast, t_ast):
                        res.append([func_info, 0])
                    else:
                        res.append([
                            func_info,
                            self.compute_app.similarity_tree_with_correction(
                                s_ast, t_ast,
                                [s_func_info[-3], s_func_info[-2]],
                                [func_info[-3], func_info[-2]])
                        ])

            res = list(filter(lambda x: x[1] > threshold, res))
            res.sort(key=lambda x: x[1], reverse=True)  # 排序

            result[s_func_info[0]]['rank'] = res
            result[s_func_info[0]]['info'] = s_func_info
        return result

    def db_similarity(self,
                      source_db,
                      target_db,
                      ast,
                      threshold,
                      start=-1,
                      end=-1):
        '''
        :param source_db: aught to be vulnerability database path
        :param target_db: firmware function database
        :param ast: True：直接使用ast进行计算相似度；False，使用ast的编码之后的向量进行相似度计算
        :param threshold: float: 0~1
        :param start/end: the position for select in sql limit
        :return:
        '''
        source_asts = []
        target_asts = []
        elf_names = set()
        where_suffix = " limit 0,20"  # the number of vulnerability functions does not exceeds 100
        for func in list(
                self.dh.get_functions(source_db, where_suffix=where_suffix)):
            # limit vul function number
            source_asts.append(func)
            elf_names.add("'" + func[0][2].split('.')[0] + "%'")
        elf_files = " or ".join(elf_names)
        # where_suffix = " where elf_file_name like %s" % elf_files
        #l.info("[DB] the firmware select filter is %s" % where_suffix)
        where_suffix = ""
        for func in self.dh.get_functions(target_db,
                                          start=start,
                                          end=end,
                                          where_suffix=where_suffix):
            target_asts.append(func)

        if ast:
            return self.ast_similarity(source_asts,
                                       target_asts,
                                       threshold=threshold)
        else:
            return self.ast_encode_similarity(source_asts,
                                              target_asts,
                                              threshold=threshold)

Beispiel #26

0

Datei anzeigen

 def __init__(self, checkpoint_path, model_name):
     self.chkpoint = checkpoint_path
     self.app = Application(load_path=checkpoint_path,
                            model_name=model_name)
     self.datahelper = DataHelper()

Beispiel #27

0

Datei anzeigen

Datei: dnn.discrete.py Projekt: ustcblue/mllib

import tensorflow as tf
import numpy as np
import sys

from datahelper import DataHelper

VOCAB_SIZE=10000
EMBEDDING_SIZE=1
LEARNING_RATE=1e-3
MINI_BATCH_SIZE=256
NORMALIZE_LAYER=0

data_helper = DataHelper(_voc_size = VOCAB_SIZE)

data_helper.load_train_ins_and_process("data/train.50_51.ins")
data_helper.load_eval_ins("data/eval.52.ins")

print "data loaded"

def eval_auc(eval_res, eval_label):
    sorted_res = np.argsort(eval_res, axis=0)

    m = 0
    n = 0
    rank = 0

    for k in range(sorted_res.shape[0]):
        idx = sorted_res[k][0]
        if eval_label[idx][0] == 1:
            m += 1
            rank += k + 1

Beispiel #28

0

Datei anzeigen

Datei: graph.py Projekt: iszhaoxin/KGI

class GraphMes:
    def __init__(self, logging, graph=None, file=None, start=0, ints=False):
        self.logging = logging
        if not os.path.isdir(logging): os.mkdir(logging)

        if graph == None and file != None:
            self.helper = DataHelper(file, NP=False)
            self.samples = self.helper.GetSamples()
            if ints == True:
                samples = []
                for i in self.samples:
                    samples.append([int(i[0]), int(i[1]), int(i[2])])
            self.samples = np.array(samples)
            self.G = self.readGraph(file, ints=ints)
            self.uG = self.readGraph(file, ints=ints, unweight=True)

        elif graph != None and file == None:
            self.G = graph
            self.uG = nx.Graph(self.G)

            self.samples = []
            for edge in self.G.edges():
                for i in self.G[edge[0]][edge[1]]:
                    self.samples.append([
                        edge[0], self.G[edge[0]][edge[1]][i]['attr'], edge[1]
                    ])

        else:
            raise Exception

        self.start = start
        self.node2id, self.id2node = self._node2id()
        self.edge2id, self.id2edge = self._edge2id()

    def readGraph(self, sf, ints=False, unweight=False):
        self.SamplesCnt = len(self.samples)
        if unweight == True:
            G = nx.Graph()
            for sample in self.samples:
                G.add_edge(sample[0], sample[2])
        else:
            G = nx.MultiDiGraph()
            for sample in self.samples:
                G.add_edge(sample[0], sample[2], attr=sample[1])
        return G

    def graph2id(self, of):
        with open(of, 'w') as f:
            for h, r, t in self.samples:
                f.write(
                    str(self.node2id[h]) + ' ' + str(self.edge2id[r]) + ' ' +
                    str(self.node2id[t]) + '\n')

    def _node2id(self):
        node2id = dict()
        id2node = dict()
        index = 0
        for node in self.G.nodes():
            node2id.update({node: self.start + index})
            id2node.update({self.start + index: node})
            index += 1
        return node2id, id2node

    def _edge2id(self):
        edge2id = dict()
        id2edge = dict()
        self.attrs = set()
        for edge in self.G.edges():
            for i in self.G[edge[0]][edge[1]]:
                # print(self.G[edge[0]][edge[1]][i]['attr'])
                self.attrs.add(self.G[edge[0]][edge[1]][i]['attr'])
        index = 0
        for attr in self.attrs:
            edge2id.update({attr: self.start + index})
            id2edge.update({self.start + index: attr})
            index += 1
        return edge2id, id2edge

    def id2file(self, nodefn, edgefn):
        with open(nodefn, 'w') as nf:
            for i in range(len(self.node2id)):
                nf.write(self.id2node[i] + ' ' + str(i) + '\n')
        with open(edgefn, 'w') as ef:
            for i in range(len(self.edge2id)):
                ef.write(self.id2edge[i] + ' ' + str(i) + '\n')

    def zipf(self, plot=True):
        print('-------------')
        x, y = [], []
        degree = nx.degree_histogram(self.G)
        for i in range(len(degree)):
            if degree[i] != 0:
                y.append(degree[i] / float(sum(degree)))
                x.append(i)

        xdata = np.array(x)
        ydata = np.array(y)
        fita, fitb = optimize.curve_fit(powerLaw, xdata, ydata)
        print(fita, fitb)
        if plot == False:
            return fita, fitb
        else:
            # x = np.linspace(xdata.min(),xdata.max(),50)
            # y = fita[1]*powerNp(x,-fita[0])
            plt.figure()
            plt.title("Degree distribution curve fitting\n")
            matplotlib.rc('xtick', labelsize=30)
            matplotlib.rc('ytick', labelsize=30)
            plt.text(max(xdata) * 0.4,
                     max(ydata) * 0.4,
                     'y=' + "{:.3f}".format(fita[1]) + '*x^-' +
                     "{:.3f}".format(fita[0]),
                     ha='center')
            plt.plot(xdata, ydata, '.')
            # plt.plot(xdata,ydata,label='data')
            plt.xlabel('k(rank order)')
            plt.ylabel('p(k)')
            plt.savefig(self.logging + '/zipf.png')
            plt.close(0)

            plt.figure()
            plt.title("Degree distribution curve fitting (log)\n")
            plt.text(max(xdata) * 0.4,
                     max(ydata) * 0.4,
                     'y=' + "{:.3f}".format(fita[1]) + '*x^-' +
                     "{:.3f}".format(fita[0]),
                     ha='center')
            plt.xlabel('k(rank order)')
            plt.ylabel('p(k)')
            plt.loglog(xdata, ydata, '.')
            # plt.loglog(xdata,ydata,'g',label='data')
            plt.savefig(self.logging + '/zipf_log.png')
            return fita, fitb

    def zipf_coeffi(self, plot=True):
        # print(nx.average_clustering(graphmes.uG))
        degree = {}
        zipf_coeffi = {}
        for i in self.uG.nodes():
            if self.uG.degree(i) in degree:
                degree[self.uG.degree(i)].append(i)
            else:
                degree.update({self.uG.degree(i): [i]})
        for i in degree:
            zipf_coeffi.update({i: 0})
            for node in degree[i]:
                zipf_coeffi[i] += nx.clustering(self.uG, node)
            zipf_coeffi[i] /= len(degree[i])

        zipf_coeffi = np.array(texthelper.sortDict(zipf_coeffi, By="key"))

        if plot == False:
            return zipf_coeffi
        else:
            xdata = zipf_coeffi[:, 0]
            ydata = zipf_coeffi[:, 1]
            fita, fitb = optimize.curve_fit(powerLaw, xdata, ydata)
            plt.figure()
            plt.title("Degree-Clustering distribution curve fitting\n")
            plt.text(max(xdata) * 0.4,
                     max(ydata) * 0.4,
                     'y=' + "{:.2f}".format(fita[1]) + '*x^-' +
                     "{:.2f}".format(fita[0]),
                     ha='center')
            plt.plot(xdata, ydata, '.')
            # plt.plot(xdata,ydata,'.', label='data')
            plt.xlabel('k')
            plt.ylabel('clustering')
            plt.savefig(self.logging + '/zipf_coeffi.png')
            plt.close(0)

            plt.figure()
            plt.text(max(xdata) * 0.4,
                     max(ydata) * 0.4,
                     'y=' + "{:.2f}".format(fita[1]) + '*x^-' +
                     "{:.2f}".format(fita[0]),
                     ha='center')
            plt.title("Degree-Clustering distribution curve fitting (log)\n")
            plt.loglog(xdata, ydata, '.')
            # plt.loglog(xdata,ydata,'.', label='data')
            plt.xlabel('log(k)')
            plt.ylabel('log(clustering)')
            plt.savefig(self.logging + '/zipf_coeffi_log.png')
            plt.close(0)
            return zipf_coeffi

    def record(self, additional=True):
        with open(self.logging + '/info.txt', 'w') as f:
            f.write(" Number of nodes :" + str(len(self.nodes)) + '\n')
            f.write(" Number of edges :" + str(len(self.edges)) + '\n')
            f.write(" Number of samples :" + str(self.samplesCnt) + '\n')
            if additional:
                uG = nx.Graph(self.G)
                connectedCnt = nx.number_connected_components(uG)
                f.write(" number_connected_components :" + str(connectedCnt) +
                        '\n')
                if connectedCnt == 1:
                    f.write(" Diameter :" + str(nx.diameter(uG)) + '\n')
                    f.write(" Radius :" + str(nx.radius(uG)) + '\n')
                    f.write(" average_shortest_path_length :" +
                            str(nx.average_shortest_path_length(uG)) + '\n')
                f.write(" Density :" + str(nx.density(uG)) + '\n')
                f.write(" average_clustering :" +
                        str(nx.average_clustering(uG)) + '\n')
                f.write(" node_connectivity :" +
                        str(nx.node_connectivity(self.G)) + '\n')
                f.write(" global_efficiency :" +
                        str(nx.global_efficiency(uG)) + '\n')

    @property
    def nodes(self):
        return list(self.G.nodes)

    @property
    def nodeCnt(self):
        return len(self.G.nodes)

    @property
    def samplesCnt(self):
        return len(self.samples)

    @property
    def edges(self):
        return list(self.attrs)

    @property
    def edgeCnt(self):
        return len(self.attrs)

Beispiel #29

0

Datei anzeigen

Datei: prepare_data.py Projekt: Varalakshmiiyer/Visual-Question-Answering-4

if args.balanced_real_images:
    if args.split == 'train':
        image_prefix = "COCO_train2014_000000"
    else:
        image_prefix = "COCO_val2014_000000"
    image_postfix = ".jpg"

elif args.abstract_scene_images:
    if args.split == 'train':
        image_prefix = "abstract_v002_train2015_0000000"
    else:
        raise NotImplementedError()

    image_postfix = ".png"

helper = DataHelper(args.annot_file, args.ques_file)

# Write dataset to file
with open(args.output_file, "w") as output_file:
    for i in range(len(helper.dataset['annotations'])):

        imd_id = helper.dataset['annotations'][i]['image_id']
        img_name = image_prefix + pad_with_zero(imd_id, args) + image_postfix

        ques_id = helper.dataset['annotations'][i]['question_id']
        question = helper.qqa[ques_id]['question']

        # Convert to comma-separated token string
        question = ','.join(question.strip().split())

        answer = helper.dataset['annotations'][i]['multiple_choice_answer']

Beispiel #30

0

Datei anzeigen

print W_out.get_shape()
print b_out.get_shape()
print out.get_shape()

# No changes to old network.py beyond this. Will be updating this soon.

cost = tf.reduce_mean(tf.squared_difference(out, y_))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

rmse = tf.sqrt(tf.reduce_mean(tf.squared_difference(out, y_)))

# Initialize
init = tf.initialize_all_variables()

dh = DataHelper(batch_size, test_idx=test_start)
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    print sess.run(W_conv1), sess.run(b_conv1), sess.run(W_conv2), sess.run(
        b_conv2)
    test_data, test_labels = dh.get_test_data(test_size)
    epoch = 1
    train_start = time.time()
    while epoch <= epochs:
        epoch_start = time.time()
        print 'Training Epoch {}...'.format(epoch)
        # get data, test_idx = 19000 is ~83% train test split
        dh = DataHelper(batch_size, test_idx=test_start)
        # test data

Beispiel #31

0

Datei anzeigen

Datei: dnn.py Projekt: ustcblue/mllib

import tensorflow as tf
import numpy as np
import sys

from datahelper import DataHelper

VOCAB_SIZE=10000
EMBEDDING_SIZE=1
LEARNING_RATE=1e-3
MINI_BATCH_SIZE=256
NORMALIZE_LAYER=0

data_helper = DataHelper(_voc_size = VOCAB_SIZE)

data_helper.load_train_ins_and_process("data/train.50_51.ins")
data_helper.load_eval_ins("data/eval.52.ins")

print "data loaded"

def eval_auc(eval_res, eval_label):
    sorted_res = np.argsort(eval_res, axis=0)

    m = 0
    n = 0
    rank = 0

    for k in range(sorted_res.shape[0]):
        idx = sorted_res[k][0]
        if eval_label[idx][0] == 1:
            m += 1
            rank += k + 1

Beispiel #32

0

Datei anzeigen

Datei: test_model.py Projekt: aaron-ecs/python-machine-learning

 def setUp(self):
     """ Read the data file and create model with training data """
     data = DataHelper().read_data()
     self.x_train, self.x_test, self.y_train, self.y_test = DataHelper(
     ).split_data(data)
     self.model = self.mh.create_model(self.x_train, self.y_train)