Example #1
0
    def generate(self):
        def writeTweetAndGetNextTweet(tweet, tweetType, iterator):
            #            print tweetType, trainingTime, testTime
            tweet["tweet_type"] = tweetType
            Utilities.writeAsJsonToFile(tweet, self.outputCombinedFile)
            return iterator.next()

        trainingFileIterator = Utilities.iterateTweetsFromFileWithTerminatingNone(self.inputTrainingSetFile)
        testFileIterator = Utilities.iterateTweetsFromFileWithTerminatingNone(self.inputTestSetFile)
        trainingTweet, testTweet = trainingFileIterator.next(), testFileIterator.next()
        trainingTime, testTime = None, None
        while trainingTweet != None or testTweet != None:
            if trainingTweet != None:
                trainingTime = datetime.strptime(trainingTweet["created_at"], Settings.twitter_api_time_format)
            if testTweet != None:
                testTime = datetime.strptime(testTweet["created_at"], Settings.twitter_api_time_format)
            if trainingTweet != None and testTweet != None:
                if testTime < trainingTime:
                    testTweet = writeTweetAndGetNextTweet(testTweet, TweetType.test, testFileIterator)
                else:
                    trainingTweet = writeTweetAndGetNextTweet(trainingTweet, TweetType.train, trainingFileIterator)
            elif trainingTweet == None:
                while testTweet != None:
                    testTweet = writeTweetAndGetNextTweet(testTweet, TweetType.test, testFileIterator)
            else:
                while trainingTweet != None:
                    trainingTweet = writeTweetAndGetNextTweet(trainingTweet, TweetType.train, trainingFileIterator)
Example #2
0
	def sendRequest(self):
		try:
			self.params =  [param.items()[0] for param in self.params];

			params = urllib.urlencode(self.params);
		
			Utilities.debug("Opening connection to "+self.base_url);
			self.conn = httplib.HTTPConnection(self.base_url,80);
			headers = {"User-Agent":self.getUserAgent(),
				"Content-Type":"application/x-www-form-urlencoded",
				"Accept":"text/json"
				};
		
			#Utilities.debug(headers);
			#Utilities.debug(params);
		
			self.conn.request("GET",self.req_file,params,headers);
			resp=self.conn.getresponse()
	 		response=resp.read();
	 		#Utilities.debug(response);
	 		
	 		self.done.emit(json.loads(response));
	 		return json.loads(response);
	 	except:
	 		self.fail.emit()
Example #3
0
 def __init__(self, currentTime, dataType, numberOfExperts):
     self.currentTime = currentTime
     self.numberOfExperts = numberOfExperts
     self.inputTrainingSetFile = Utilities.getTrainingFile(currentTime, dataType, self.numberOfExperts)
     self.inputTestSetFile = Utilities.getTestFile(currentTime, dataType, self.numberOfExperts, bottom=True)
     self.outputCombinedFile = Utilities.getStreamingSetsFile(currentTime, dataType, numberOfExperts)
     Utilities.createDirectory(self.outputCombinedFile)
Example #4
0
    def request(self, method, url, data=[]):
        if Iugu.getApiKey() is None:
            Utilities.authFromEnv()

        if Iugu.getApiKey() is None:
            raise IuguAuthenticationException("Chave de API não configurada. Utilize Iugu.setApiKey(...) para configurar.")

        headers = self.__defaultHeaders()
        ( response_body, response_code ) = self._requestWithCURL( method, url, headers, data )
        try:
            response = json.loads(response_body)
        except ValueError:
            raise IuguObjectNotFound(response_body)

        if response_code == 404:
            raise IuguObjectNotFound(response_body)

        if response.errors is not None:
            if type(response.errors) != "str" and len(response.errors) == 0:
                response.errors = None
            elif type(response.errors) != "str" and len(response.errors) > 0:
                response.errors = response.errors
            if response.errors is not None and type(response.errors) == "str":
                response.errors = response.errors
        iugu_last_api_response_code = response_code
        return response
Example #5
0
File: book.py Project: koe-/ebola
	def to_xml(self, library=None):
		d = self.to_dict()
		
		res = "<book>\n"
		for x in d:
			if x == "authors":
				if len(d["authors"]) > 0:
					res = res + "  <authors>\n"+ \
								"    <name>" + ("</name>\n    <name>".join([Utilities.escape_xml(y) for y in d["authors"]])) + "</name>\n"+ \
								"  </authors>\n"
			elif x == "categories" and library != None:
				if len(d["categories"]) > 0:
					res = res + "  <categories>\n"
					col = library.categories.collection
					for i in d["categories"]:
						res = res + \
								"    <item>\n" + \
								"      " + "".join(["<n color='" + col[y].color + "'>" + Utilities.escape_xml(col[y].name) + "</n>" for y in library.categories.get_full_category_ids(i)]) + "\n" +\
								"    </item>\n"
					res = res + "  </categories>\n"
			elif x == "isbn10":
				res = res + "  <isbn10>" + ISBN.to_string(d["isbn10"], set_hyphen=False) + "</isbn10>\n"
			elif x == "isbn13":
				res = res + "  <isbn13>" + ISBN.to_string(d["isbn13"], set_hyphen=False) + "</isbn13>\n"
			elif d[x] != None:
				res = res + "  <" + x + ">" + Utilities.escape_xml(str(d[x])) + "</" + x + ">\n"
		return res + "</book>\n"
Example #6
0
    def close(self):
    	"""
    	None -> None

    	Deletes residual files from the simulation
    	"""
    	Utilities.clean_retrosheet_files()
Example #7
0
	def __init__(self,inputstream,dictionary):
		Utilities.debug('Reader init');
		self.tokenMap = dictionary;
		self.rawIn = inputstream;
		self.inn = ByteArray();
		self.buf = bytearray(1024);
		self.bufSize = 0;
		self.readSize = 1;
Example #8
0
    def __init__(self, conn, reader, writer, digest):
        super(WALogin, self).__init__()

        self.conn = conn
        self.out = writer
        self.inn = reader
        self.digest = digest

        Utilities.debug("WALOGIN INIT")
 def generateStatsForGlobalClassifier():
     classifier = GlobalClassifier()
     classifier.load()
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format),  'metric': 'aucm', 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
         data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
         Utilities.writeAsJsonToFile(data, Settings.stats_for_global_classifier)
         currentDay+=timedelta(days=1)
Example #10
0
	def nextTree(self):
		stanzaSize = self.readInt16(self.rawIn,1);
		self.inn.buf = [];
		self.fillBuffer(stanzaSize);
		ret = self.nextTreeInternal();
		Utilities.debug("<<")
		if ret is not None:
			'''Utilities.debug(ret.toString());'''
		return ret;
Example #11
0
	def message_status_update(self,fmsg):
		Utilities.debug("Message status updated {0}".format(fmsg.status));
		contact = fmsg.getContact();
		modelData = fmsg.getModelData();
		modelData["Contact"] = contact.getModelData();
		if fmsg.status == WAXMPP.message_store.store.Message.STATUS_SENT:
			self.messageSent.emit(modelData);
		elif fmsg.status == WAXMPP.message_store.store.Message.STATUS_DELIVERED:
			self.messageDelivered.emit(modelData); 
Example #12
0
 def __init__(self, currentTime, numberOfExperts):
     super(DocumentTypeRuuslUnigramWithMeta, self).__init__(
         currentTime, DocumentType.typeRuuslUnigramWithMeta, numberOfExperts
     )
     self.inputTrainingSetFile = Utilities.getTrainingFile(
         currentTime, DocumentType.typeRuuslUnigram, self.numberOfExperts
     )
     self.inputTestSetFile = Utilities.getTestFile(
         currentTime, DocumentType.typeRuuslUnigram, self.numberOfExperts, bottom=True
     )
 def generateStatsForTrainingDataPerDay():
     currentDay = Settings.startTime
     noOfDays = 1
     while currentDay<=Settings.endTime:
         classDistribution = defaultdict(int)
         for d in Utilities.getTweets(fileNameMethod=Utilities.getTrainingFile, dataDirection=DataDirection.past, currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays):
             classDistribution[d[1]]+=1
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'class_distribution': classDistribution}
         Utilities.writeAsJsonToFile(data, Settings.stats_for_training_data)
         currentDay+=timedelta(days=1)
Example #14
0
	def write(self, node,needsFlush = 0):
		if node is None:
			self.out.write(0);
		else:
			Utilities.debug(">>");
			'''Utilities.debug(node.toString());'''
			self.writeInternal(node);
		
		self.flushBuffer(needsFlush);
		self.out.buf = [];
Example #15
0
    def setup(self):
    	"""
    	None -> None

    	Downloads and parses necessary retrosheet data for the simulation
    	"""
        retro = Retrosheet(self.simYear)
        Utilities.ensure_gamelog_files_exist(self.simYear)
        Utilities.ensure_boxscore_files_exist(self.simYear, 'HOU')
        retro.clean_used_files()
 def generateStatsForTopFeatures():
     global maxLength
     currentDay = Settings.startTime
     noOfDays = 1
     while currentDay<=Settings.endTime:
         classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
         classifier.load()
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram}
         data['features']=classifier.showMostInformativeFeatures(2000)
         Utilities.writeAsJsonToFile(data, Settings.stats_for_most_informative_features)
         currentDay+=timedelta(days=1)
Example #17
0
    def __repr__(self):
        """
		Give a verbose representation for a word in the format <form>@<lemma><categories>, for example: [email protected]('pl',)"

		@rtype: str
		"""
        z = Utilities.unidecode(self.__form) + "@" + ` self.__lemma `
        if len(self.categories) == 0:
            return z
        else:
            return z + Utilities.tuple_str(self.categories)
 def generateStatsToDetermineFixedWindowLength():
     global maxLength
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         for noOfDays in Utilities.getClassifierLengthsByDay(currentDay, maxLength): 
             classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
             classifier.load()
             data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
             data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
             Utilities.writeAsJsonToFile(data, Settings.stats_to_determine_fixed_window_length)
         currentDay+=timedelta(days=1)
Example #19
0
	def streamStart(self):
		stanzaSize = self.readInt16(self.rawIn,1);
		self.fillBuffer(stanzaSize);
		tag = self.inn.read();
		size = self.readListSize(tag);
		tag = self.inn.read();
		if tag != 1:
			Utilities.debug(tag);
			raise Exception("expecting STREAM_START in streamStart");
		attribCount = (size - 2 + size % 2) / 2;
		attributes = self.readAttributes(attribCount);
 def generateStatsObservePerformanceByRelabelingDocuments():
     global maxLength, idealModelLength
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength))))
         for noOfDays in noOfDaysList: 
             classifier = FixedWindowWithRelabeledDocumentsClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
             classifier.load()
             data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
             data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
             Utilities.writeAsJsonToFile(data, Settings.stats_to_observe_performance_by_relabeling_documents)
         currentDay+=timedelta(days=1)
 def generateDataSetStats125():
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'train_classes': defaultdict(int), 'test_classes': defaultdict(int)}
         inputTrainingSetFile = Utilities.getTrainingFile(currentDay, DocumentType.typeRuuslUnigram, Settings.numberOfExperts)
         inputTestSetFile = Utilities.getTestFile(currentDay, DocumentType.typeRuuslUnigram, Settings.numberOfExperts, bottom=True)
         for file, tweetType in [(inputTrainingSetFile, 'training'), (inputTestSetFile, 'test')]:
             for tweet in Utilities.iterateTweetsFromFile(file):
                 if tweetType=='training': data['train_classes'][tweet['class']]+=1
                 else: data['test_classes'][tweet['class']]+=1
         Utilities.writeAsJsonToFile(data, Settings.stats_for_dataset_125)
         currentDay+=timedelta(days=1)
Example #22
0
	def quit(self):
		Utilities.debug("got quit!!!")
		#self.connMonitor.exit()
		#self.conn.disconnect()
		
		'''del self.connMonitor
		del self.conn.inn
		del self.conn.out
		del self.conn.login
		del self.conn.stanzaReader'''
		#del self.conn
		self.doQuit.emit();
Example #23
0
	def checkConnection(self):
		try:
			if self.conn.state == 0:
				raise Exception("Not connected");
			elif self.conn.state == 2:
				self.conn.sendPing();
		except:
			print "Connection crashed, reason: %s"%sys.exc_info()[1]
			self.networkDisconnected()
			self.networkAvailable();
			
		Utilities.debug("CHECK PASSEDDDDDDDDDDDDDDD")
Example #24
0
    def __repr__(self):
        """
		Return a verbose string representation.
		@rtype: str
		"""
        form, entry_form, id = self._content[0:3]
        categories = self.categories()
        r = ["{"]
        r.append(Utilities.unidecode(form) + "@" + Utilities.unidecode(entry_form) + "." + str(id))
        if self.categories():
            r.append(categories)
        r.append("}")
        return "".join(r)
Example #25
0
 def generate(self):
     for inputFile, outputFile in [
         (self.inputTrainingSetFile, self.outputTrainingSetFile),
         (self.inputTestSetFile, self.outputTestSetFile),
     ]:
         for tweet in Utilities.iterateTweetsFromFile(inputFile):
             data = {}
             for k in DocumentType.keys:
                 data[k] = tweet[k]
             data["screen_name"] = tweet["screen_name"]
             data["user_id"] = tweet["user_id"]
             data["document"] = tweet["document"] + DocumentTypeRuuslUnigramWithMeta.getUrlMeta(data["text"])
             Utilities.writeAsJsonToFile(data, outputFile)
Example #26
0
 def generate(self):
     for inputFile, outputFile in [
         (self.inputTrainingSetFile, self.outputTrainingSetFile),
         (self.inputTestSetFile, self.outputTestSetFile),
     ]:
         for tweet in Utilities.iterateTweetsFromFile(inputFile):
             data = {}
             for k in DocumentType.keys:
                 data[k] = tweet[k]
             data["screen_name"] = tweet["user"]["screen_name"]
             data["user_id"] = tweet["user"]["id_str"]
             data["document"] = self.modifyDocument(data["text"])
             Utilities.writeAsJsonToFile(data, outputFile)
Example #27
0
 def generateDataForGlobalClassifier():
     inputDataFile = "/home/kykamath/projects/Classifiers/src/lda_svm/global_classifier/data/global_classifier"
     classToIntMap = {"sports": 1, "politics": 2, "entertainment": 3, "technology": 4}
     for line in open(inputDataFile):
         try:
             classType, term = line.strip().split()
             stringClassType = Utilities.getTopicForIndex(classType)
             if stringClassType in classToIntMap:
                 Utilities.writeAsJsonToFile(
                     {"class": stringClassType, "data": [term]}, Settings.globalClassifierData
                 )
         except:
             pass
def get_tasks():
	u = request.form['url'].lower()
	
	url = Utilities.get_shortened_url(u)
	url_3 = Utilities.get_shortened_url(u,3)

	return_only_parent = False

	# If url is same as parent url, return everything just for parent
	# Dont redundantly return for parent and itself
	if url == url_3 or url+'/' == url_3:
			return_only_parent = True

	ds = DataStore()

	if not return_only_parent:

		all_urls = Utilities.modify_url(url)
		print all_urls

		# If the same url is also a parent url, return all results of parent .
		# And skip individual url results

		for url in all_urls:
			result = ds.fetch(url)
			if result == False:
				print " Tried for url " + url
			else:
				x = {"result":result}
				return jsonify(x)

	# If for our exact url and its modifications , nothing got returned

	outer_url = "parent::" + Utilities.get_shortened_url(url,3)
	print outer_url
	
	result = ds.fetch_all_from_parent(outer_url)
	if result : 
		x = {"result":result}
		return jsonify(x)
	else:
		if outer_url[-1] == '/':
			result = ds.fetch_all_from_parent(outer_url[:-1])
		else:
			result = ds.fetch_all_from_parent(outer_url + '/')
		if result : 
			x = {"result":result}
			return jsonify(x)

	# If there is still nothing to show
	return 'No Response'
    def run(self):
        """
        Custom runner for OWD initiative
        It takes as arguments the parameters that gaiatest command would need
        For example:
            python ffox_test_runner_py --testvars=<testvars path> --address=localhost:2828 <tests path |\
            test suite path>
        """

        # Preprocess
        parser = BaseMarionetteOptions(usage='%prog [options] test_file_or_dir <test_file_or_dir> ...')
        structured.commandline.add_logging_group(parser)
        options, tests = parser.parse_args(self.args[1:])
        parser.verify_usage(options, tests)

        # Traverse the tbpl logs option list and create directories if required
        for f in options.log_tbpl:
            d = f[:f.rfind('/')]
            if not os.path.exists(d):
                os.makedirs(d)

        logger = structured.commandline.setup_logging(options.logger_name, options)
        options.logger = logger

        # Remove default stdout logger from mozilla logger
        to_delete = filter(lambda h: h.stream.name == '<stdout>', logger.handlers)
        for d in to_delete:
            logger.remove_handler(d)

        location = self.parse_toolkit_location(self.args)
        options.toolkit_location = location

        # Hit the runner
        Utilities.connect_device()
        self.runner = self.start_test_runner(self.runner_class, options, tests)

        # Show the results via console and prepare the details
        self.process_runner_results()
        self.edit_html_results()
        self.edit_test_details()
        self.display_results()
        if self.runner.testvars['graphics']['enabled']:
            total_results_count = [self.passed, self.unexpected_failures, self.automation_failures,
                                   self.expected_failures, self.unexpected_passed, self.skipped]
            self.graphics = Graphics(results_by_suite=self.results_by_suite, total_results_count=total_results_count,
                                     output_dir=self.runner.testvars['graphics']['graphics_dir'])
            self.graphics.generate_all_graphics()

        # Generate CSV results (if required)
        is_cert = self.runner.testvars['general']['is_cert_device']
        Utilities.generate_csv_reports(self, is_cert)
Example #30
0
def search_user(user: str, stars: int, full: bool, utilities: Utilities):
    start = time()
    rate_limit_start, _ = utilities.get_rate_limit()
    user = User(name=user, minimal_stars=stars, full_search=full, utilities=utilities)
    user.get_all_repositories_parallel()
    # s.get_all_repositories()

    rate_limit_end, rate_reset = utilities.get_rate_limit()
    user.printout()
    end = time()
    print("-" * 100)
    print("TIME: " + str(end - start))
    print("Rate limit remaining: {}\nRate limit will be reset in {} seconds.".format(rate_limit_end, rate_reset))
    print("API rate used for this user: {}".format(rate_limit_start - rate_limit_end))
 def __init__(self):
     self.utilities = Utilities()
     self.aspect_classifier = AspectClassifier()
     self.random_states = [11, 22, 33, 44, 55]
Example #32
0
    def compress_image(oimg,
                       block_size=4,
                       step_size=2,
                       spatial_factor=2,
                       intensity_shrinkage=0.75,
                       max_x_offset=None,
                       max_y_offset=None,
                       err_func=ImageUtils.mse,
                       verbosity=0):
        if verbosity > 0:
            print(Utilities.whoami())
            argdict = locals().copy()
            for k in argdict.keys():
                val = argdict[k]
                if not Utilities.is_iterable(val):
                    print("  {0}: {1}".format(k, argdict[k]))
                else:
                    print("  {0} is iterable".format(k))
        if verbosity > 0:
            print("orig dims: {0}, {1}".format(oimg.shape[0], oimg.shape[1]))
        cimg = ImageUtils.trim_image(oimg,
                                     spatial_factor=spatial_factor,
                                     block_size=block_size,
                                     verbosity=verbosity)
        if verbosity > 0:
            print("trimmed dims: {0}, {1}".format(cimg.shape[0],
                                                  cimg.shape[1]))
        if max_x_offset is None:
            max_x_offset = cimg.shape[1] - block_size
        if max_y_offset is None:
            max_y_offset = cimg.shape[0] - block_size
        dimg = ImageUtils.spatial_shrink(cimg, spatial_factor=spatial_factor)
        print("dimg_wd = {0}, dimg_ht = {1}".format(dimg.shape[0],
                                                    dimg.shape[1]))
        FCode = namedtuple("FCode",
                           ["dx", "dy", "mean_add", "rx", "ry", "err"])
        codes = []
        for rx in range(0, cimg.shape[1], block_size):
            if verbosity > 0:
                print("rx={0}".format(rx), end='')
            for ry in range(0, cimg.shape[0], block_size):

                parts = Compressor.find_best_params(
                    cimg,
                    dimg,
                    rx,
                    ry,
                    block_size=block_size,
                    step_size=step_size,
                    spatial_factor=spatial_factor,
                    intensity_shrinkage=intensity_shrinkage,
                    max_x_offset=max_x_offset,
                    max_y_offset=max_y_offset,
                    err_func=err_func,
                    verbosity=verbosity)
                dx, dy, mean_add, x, y, err, tries = parts
                code = FCode(dx, dy, mean_add, x, y, err)
                codes.append(code)
            print("--")
        params = OrderedDict()
        params['img_ht'] = cimg.shape[0]
        params['img_wd'] = cimg.shape[1]
        params['block_size'] = block_size
        params['step_size'] = step_size
        params['spatial_factor'] = spatial_factor
        params['intensity_shrinkage'] = intensity_shrinkage
        params['codes'] = codes
        return params
Example #33
0
        accuracy = np.mean((predictions > 0.5) == y_t)
        conf_matrix = pd.crosstab(y_t,
                                  predictions,
                                  rownames=['Actual'],
                                  colnames=['Predicted'])
        return accuracy, conf_matrix, self.c_


# Test usage
if 1:
    """
  Sigmoid prediction accuracy: 0.720
  RelU prediction accuracy: 0.750
  TanH prediction accuracy: 0.710
  """
    ut = Utilities()

    # Pulling the data into a tableu
    data = pd.read_csv('student_data.csv')

    # Drill-down the rank column
    processed_data = ut.one_hot_encoder(data, "rank")

    # Scaling the columns
    processed_data['gre'] = processed_data['gre'] / 800
    processed_data['gpa'] = processed_data['gpa'] / 4.0

    # Split the data 2/3 train and 1/3 test
    train_data, test_data = ut.test_train_split(processed_data)

    # Splitting inputs and labels
Example #34
0
    def setup(self):
        """ Setups the GAN """
        # TODO new method  called from init opt passed

        print("Attack type: " + self.attack_type)

        conn = SQLConnector()
        data = conn.pull_kdd99(attack=self.attack_type, num=5000)
        dataframe = pd.DataFrame.from_records(
            data=data, columns=conn.pull_kdd99_columns(allQ=True))

        # ==========
        # ENCODING
        # ==========
        # https://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn

        d = defaultdict(LabelEncoder)

        # Splitting the data from features and lablels. Want labels to be consistent with evaluator encoding, so
        # we use the utils attack_to_num function
        features = dataframe.iloc[:, :41]
        attack_labels = dataframe.iloc[:, 41:]

        for i in range(0, attack_labels.size):
            attack_labels.at[i, 'attack_type'] = util.attacks_to_num(
                attack_labels.at[i, 'attack_type'])

        features = features.apply(
            lambda x: d[x.name].fit_transform(x))  # fit is encoded dataframe

        # feature scaling, reccomended from github implementation
        self.scaler = MinMaxScaler(feature_range=(-1, 1))
        scaled_features = self.scaler.fit_transform(features.astype(float))
        scaled_df = pd.DataFrame(data=scaled_features)

        # Join the seperately encoded sections back into one dataframe
        dataframe = scaled_df.join(attack_labels)
        dataset = dataframe.values  # transform to ndarray
        print(dataset)

        # TODO: Feature scaling? May be necessary. Has to be on a per-feature basis?

        # Splitting up the evaluation dataset. Should maybe be moved?
        eval_dataset = pd.read_csv('PortsweepAndNonportsweep.csv', header=None)
        eval_dataset = eval_dataset.values

        self.eval_dataset_X = eval_dataset[:, 0:41].astype(int)
        self.eval_dataset_Y = eval_dataset[:, 41]

        validationToTrainRatio = 0.05
        validationSize = int(validationToTrainRatio * len(self.eval_dataset_X))
        self.eval_validation_data = self.eval_dataset_X[:validationSize]
        self.eval_validation_labels = self.eval_dataset_Y[:validationSize]
        self.eval_dataset_X = self.eval_dataset_X[validationSize:]
        self.eval_dataset_Y = self.eval_dataset_Y[validationSize:]

        testToTrainRatio = 0.05
        testSize = int(testToTrainRatio * len(self.eval_dataset_X))
        self.eval_test_data = self.eval_dataset_X[:testSize]
        self.eval_test_labels = self.eval_dataset_Y[:testSize]
        self.eval_dataset_X = self.eval_dataset_X[testSize:]
        self.eval_dataset_Y = self.eval_dataset_Y[testSize:]

        # to visually judge encoded dataset
        print("Real encoded " + self.attack_type + " attacks:")
        print(dataset[:1])

        # Set X as our input data and Y as our label
        self.X_train = dataset[:, 0:41].astype(float)
        Y_train = dataset[:, 41]

        # labels for data. 1 for valid attacks, 0 for fake (generated) attacks
        self.valid = np.ones((self.batch_size, 1))
        self.fake = np.zeros((self.batch_size, 1))
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import numpy as np
import pandas as pd
from utilities import Utilities

util = Utilities()
[training_set_X, training_set_Y] = util.read_input_data()

Y = training_set_Y[0:100]


def sanitize_data():
    X = []
    for i in range(0, 100):
        conversation_with_tags = training_set_X[i]
        conversation = util.remove_tags(conversation_with_tags)
        conversation = util.remove_punctuation(conversation)
        conversation = conversation.lower().split(
        )  #sets everything to lowercase and splits on the spaces by default
        #conversation = util.stem(conversation) #stemming (taking the root of the word)
        conversation = util.lemmatize(conversation)
        X.append(conversation)
    return X, Y
 def __init__(self):
     """Constructor
     """
     self._utilities = Utilities()
class CommentLevelEvaluation:
    def __init__(self):

        self.data_file = 'mmh_dataset.csv'
        self.utilities = Utilities()
        # self.Processor = Processor()

        self.storage_path = 'comment-level-datasets-2/'
        # self.storage_path = 'r-combine-outputs/'
        self.random_states = [111, 122, 133, 144, 155]

    def generate_datasets(self, dataset_initial):
        X = self.utilities.read_from_csv(self.data_file)
        y = [0] * len(X)  # fake labels
        for random_state in self.random_states:
            X_train, X_test, y_train, y_test = tts(X,
                                                   y,
                                                   test_size=0.2,
                                                   random_state=random_state)

            for row in X_test:
                row[0] = row[0].replace('**$**', "")

            self.utilities.save_list_as_csv(
                X_train, self.storage_path + dataset_initial + '_train_' +
                str(random_state) + '.csv')
            self.utilities.save_list_as_csv(
                X_test, self.storage_path + dataset_initial + '_test_' +
                str(random_state) + '.csv')

    def run_experiment(self, dataset_initial):
        for random_state in self.random_states:
            X_train = self.storage_path + dataset_initial + '_train_' + str(
                random_state) + '.csv'
            X_test = self.storage_path + dataset_initial + '_test_' + str(
                random_state) + '.csv'

            settings = {
                'training_file':
                X_train,
                'data_file':
                X_test,
                'max_reviews':
                None,  # Options: 0 to any integer | default: None (all)
                'output_file':
                self.storage_path + dataset_initial + '_output_' +
                str(random_state) + '.csv'
            }

            processor = Processor(settings=settings)
            processor.run()

    def merge_aspect_classes(self, aspects):
        group_1 = ['staff attitude and professionalism', 'communication']
        group_2 = ['care quality', 'resource', 'process']
        group_3 = ['environment', 'food', 'parking']
        group_4 = ['waiting time']
        group_5 = ['other', 'noise']
        groups = [group_1, group_2, group_3, group_4, group_5]
        new_aspects = []
        for aspect in aspects:
            for group in groups:
                if aspect in group:
                    new_aspects.append(
                        group[0]
                    )  # all members will be replaced by the first member of the group
                    break
        return new_aspects

    def calculate_comment_level_scores_for_categories(self, y_test, y_pred):
        categories = []
        for aspects in y_test:
            categories = categories + aspects
        categories = list(set(categories))
        cat_scores = {}
        for category in categories:
            test_binary = []
            pred_binary = []

            for index, test_categories in enumerate(y_test):
                pred_categories = y_pred[index]
                if category in test_categories:
                    test_binary.append(1)
                else:
                    test_binary.append(0)

                if category in pred_categories:
                    pred_binary.append(1)
                else:
                    pred_binary.append(0)

            scores = {
                'precision': precision_score(test_binary, pred_binary),
                'recall': recall_score(test_binary, pred_binary),
                'f1-score': f1_score(test_binary, pred_binary)
            }

            cat_scores[category] = scores
        return cat_scores

    def calculate_comment_level_scores_for_categories_backup(
            self, y_test, y_pred):
        categories = []
        for aspects in y_test:
            categories = categories + aspects
        categories = list(set(categories))
        category_f_scores = {}
        for category in categories:
            true_positives = 0
            false_positives = 0
            false_negatives = 0
            true_negatives = 0

            for index, test_categories in enumerate(y_test):
                pred_categories = y_pred[index]

                if category in test_categories and category in pred_categories:
                    true_positives += 1
                elif category in test_categories and category not in pred_categories:
                    false_negatives += 1
                elif category not in test_categories and category in pred_categories:
                    false_positives += 1
                else:
                    true_negatives += 1

            # print [true_positives, false_positives, false_negatives, true_negatives]
            if float(true_positives + false_positives) > 0:
                precision = true_positives / float(true_positives +
                                                   false_positives)
            else:
                precision = 0

            if true_positives / float(true_positives + false_negatives):
                recall = true_positives / float(true_positives +
                                                false_negatives)
            else:
                recall = 0

            f_score = (2 * precision * recall) / (
                precision + recall) if precision + recall > 0 else 0
            category_f_scores[category] = f_score

        return category_f_scores

    def calculate_accuracy(self, dataset_initials):
        overall_precisions = []
        overall_recalls = []
        overall_f1_scores = []

        envs = []
        wts = []
        saaps = []
        cqs = []
        ots = []
        for random_state in self.random_states:
            X_test = self.utilities.read_from_csv(self.storage_path +
                                                  dataset_initials + '_test_' +
                                                  str(random_state) + '.csv')
            X_pred = self.utilities.read_from_csv('r-combine-outputs/' +
                                                  dataset_initials +
                                                  '_combined_confidence_' +
                                                  str(random_state) + '.csv')

            y_test = []
            y_pred = []
            for index, row in enumerate(X_test):
                del row[0]
                aspects = []
                for item in row:
                    if item:
                        aspects.append(item.rsplit(' ', 1)[0])
                y_test.append(list(set(self.merge_aspect_classes(aspects))))

                predicted_row = X_pred[index]

                del predicted_row[0]
                aspects = []
                for item in predicted_row:
                    if item:
                        aspects.append(item)
                y_pred.append(list(set(aspects)))

            true_positives = 0
            false_positives = 0
            false_negatives = 0
            true_negatives = 0

            for index, test in enumerate(y_test):
                pred = y_pred[index]

                pred_minus_test = [item for item in pred if item not in test]
                test_minus_pred = [item for item in test if item not in pred]

                if len(pred_minus_test) == 0 and len(test_minus_pred) == 0:
                    true_positives += 1
                # elif len(pred_minus_test) > 0 and len(test_minus_pred) == 0:
                elif len(pred_minus_test) > 0:
                    false_positives += 1
                # elif len(test_minus_pred) > 0 and len(pred_minus_test) == 0:
                elif len(test_minus_pred) > 0:
                    false_negatives += 1
                else:
                    true_negatives += 1

            precision = true_positives / float(true_positives +
                                               false_positives)
            recall = true_positives / float(true_positives + false_negatives)

            overall_f1_score = (2 * precision * recall) / (precision + recall)
            overall_accuracy = (true_positives + true_negatives) / float(
                len(y_test))

            #print overall_accuracy

            overall_precisions.append(precision)
            overall_recalls.append(recall)
            overall_f1_scores.append(overall_f1_score)

            category_scores = self.calculate_comment_level_scores_for_categories(
                y_test, y_pred)
            score_name = 'f1-score'
            envs.append(category_scores['environment'][score_name])
            wts.append(category_scores['waiting time'][score_name])
            saaps.append(category_scores['staff attitude and professionalism']
                         [score_name])
            cqs.append(category_scores['care quality'][score_name])
            ots.append(category_scores['other'][score_name])
        # print overall_precisions
        precision = sum(overall_precisions) / float(len(overall_precisions))
        recall = sum(overall_recalls) / float(len(overall_recalls))
        f1_score = sum(overall_f1_scores) / float(len(overall_f1_scores))
        environment = sum(envs) / float(len(envs))
        waiting_time = sum(wts) / float(len(wts))
        staff_attitude = sum(saaps) / float(len(saaps))
        care_quality = sum(cqs) / float(len(cqs))
        other = sum(ots) / float(len(ots))
        #print "precision\trecall\tf1_score\tenvironment\twaiting_time\tstaff_attitude\tcare_quality\tother"
        print '%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f' % (
            precision, recall, f1_score, environment, waiting_time,
            staff_attitude, care_quality, other)

    def calculate_per_system_accuracy(self, dataset_initials):
        overall_precisions = []
        overall_recalls = []
        overall_f1_scores = []

        envs = []
        wts = []
        saaps = []
        cqs = []
        ots = []
        for random_state in self.random_states:
            X_test = self.utilities.read_from_csv(self.storage_path +
                                                  dataset_initials + '_test_' +
                                                  str(random_state) + '.csv')

            # system A output
            # X_pred = self.utilities.read_from_csv(self.storage_path + dataset_initials + '_output_' + str(random_state) + '.csv')

            # system B output
            X_pred = self.utilities.read_from_csv('r-combine-outputs/' +
                                                  dataset_initials +
                                                  '_output_confidence_' +
                                                  str(random_state) + '.csv')

            y_test = []
            y_pred = []
            for index, row in enumerate(X_test):
                del row[0]
                aspects = []
                for item in row:
                    if item:
                        aspects.append(item.rsplit(' ', 1)[0])
                        # aspects.append(item)
                y_test.append(list(set(self.merge_aspect_classes(aspects))))
                predicted_row = X_pred[index]

                del predicted_row[0]
                aspects = []
                for item in predicted_row:
                    if item:
                        aspects.append(item.rsplit(' ', 1)[0])
                        # aspects.append(item)
                y_pred.append(list(set(aspects)))
            true_positives = 0
            false_positives = 0
            false_negatives = 0
            true_negatives = 0

            for index, test in enumerate(y_test):
                pred = y_pred[index]

                pred_minus_test = [item for item in pred if item not in test]
                test_minus_pred = [item for item in test if item not in pred]

                if len(pred_minus_test) == 0 and len(test_minus_pred) == 0:
                    true_positives += 1
                # elif len(pred_minus_test) > 0 and len(test_minus_pred) == 0:
                elif len(pred_minus_test) > 0:
                    false_positives += 1
                # elif len(test_minus_pred) > 0 and len(pred_minus_test) == 0:
                elif len(test_minus_pred) > 0:
                    false_negatives += 1
                else:
                    true_negatives += 1

            precision = true_positives / float(true_positives +
                                               false_positives)
            recall = true_positives / float(true_positives + false_negatives)

            overall_f1_score = (2 * precision * recall) / (precision + recall)
            overall_accuracy = (true_positives + true_negatives) / float(
                len(y_test))

            #print overall_accuracy

            overall_precisions.append(precision)
            overall_recalls.append(recall)
            overall_f1_scores.append(overall_f1_score)

            category_scores = self.calculate_comment_level_scores_for_categories(
                y_test, y_pred)
            score_name = 'f1-score'
            envs.append(category_scores['environment'][score_name])
            wts.append(category_scores['waiting time'][score_name])
            saaps.append(category_scores['staff attitude and professionalism']
                         [score_name])
            cqs.append(category_scores['care quality'][score_name])
            ots.append(category_scores['other'][score_name])

        precision = sum(overall_precisions) / float(len(overall_precisions))
        recall = sum(overall_recalls) / float(len(overall_recalls))
        f1_score = sum(overall_f1_scores) / float(len(overall_f1_scores))
        environment = sum(envs) / float(len(envs))
        waiting_time = sum(wts) / float(len(wts))
        staff_attitude = sum(saaps) / float(len(saaps))
        care_quality = sum(cqs) / float(len(cqs))
        other = sum(ots) / float(len(ots))
        #print "precision\trecall\tf1_score\tenvironment\twaiting_time\tstaff_attitude\tcare_quality\tother"
        print '%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f' % (
            precision, recall, f1_score, environment, waiting_time,
            staff_attitude, care_quality, other)
Example #38
0
def copyAllToMaster(nodeIP, sourcePath):

    ret_status = [
        True, "Step 1(of 18) - Check config files",
        "Step 2(of 18) - Check Directory for node",
        "Step 3(of 18) - Check directory for backup node",
        "Step 4(of 18) - check Home directory ",
        "Step 5(of 18) - Create Temp Folder",
        "Step 6(of 18) - Copy Main Node config to temp ",
        "Step 7(of 18) - Truncate Node Directory ",
        "Step 8(of 18) -Extract Node Files to Temp ",
        "Step 9(of 18) - Get Zip File Name",
        "Step 10(of 18) - Copy node files from temp to node directory",
        "Step 11(of 18) - Copy node Config file to node Directory",
        "Step 12(of 18) - Create Temp Folder",
        "Step 13(of 18) - Copy Backup Node config to temp ",
        "Step 14(of 18) - Truncate Backup Node Directory ",
        "Step 15(of 18) -Extract Backup Node Files to Temp ",
        "Step 16(of 18) - Get Zip File Name",
        "Step 17(of 18) - Copy Backup node files from temp to node directory",
        "Step 18(of 18) - Copy Backup node Config file to backup node Directory"
    ]
    print("\n Running startBackupNodeExecution() \n")

    try:

        backup_node_ip = nodestatus.getBackupNodeIp(nodeIP)
        if (backup_node_ip == ""):
            ret_status[0] = False
            ret_status.append(
                'ERROR CODE 2002: No Backup Node Ip found for Node IP :' +
                nodeIP)
            return ret_status
        ret_status[1] = ret_status[1] + " :Done"
        sourcePathList1 = sourcePath.split(os.sep)
        sourcePathList2 = sourcePathList1[len(sourcePathList1) - 1].split('.')
        sourcePathFileName = sourcePathList2[0]
        if not os.path.isdir(".." + os.sep + "files" + os.sep + nodeIP):
            ret_status[0] = False
            ret_status.append('ERROR CODE 2001: Home Directory for node :' +
                              nodeIP + " doesn't exist")
            return ret_status
        elif not os.path.isdir(".." + os.sep + "files" + os.sep +
                               backup_node_ip):
            ret_status[0] = False
            ret_status[2] = ret_status[2] + " : Done"
            ret_status.append(
                'ERROR CODE 2001: Home Directory for backup node :' + nodeIP +
                " doesn't exist")
            return ret_status
        elif not os.path.isfile(sourcePath):
            ret_status[0] = False
            ret_status[2] = ret_status[2] + " : Done"
            ret_status[3] = ret_status[3] + " : Done"
            ret_status.append('ERROR CODE 2002: Invalid Source Path: :' +
                              sourcePath)
            return ret_status

        else:
            ret_status[2] = ret_status[2] + " : Done"
            ret_status[3] = ret_status[3] + " : Done"
            ret_status[4] = ret_status[4] + " : Done"

            ##########  Managing Main Node Copies ##########################
            utilities.createOrReplace(".." + os.sep + "temp" + os.sep)
            ret_status[5] = ret_status[5] + " : Done"

            shutil.move(
                ".." + os.sep + "files" + os.sep + nodeIP + os.sep +
                "configClient.xml",
                ".." + os.sep + "temp" + os.sep + "configClient.xml")
            ret_status[6] = ret_status[6] + " : Done"

            shutil.rmtree(".." + os.sep + "files" + os.sep + nodeIP)
            ret_status[7] = ret_status[7] + " : Done"

            zip_ref = zipfile.ZipFile(sourcePath, 'r')
            zip_ref.extractall(".." + os.sep + "temp")
            zip_ref.close()
            ret_status[8] = ret_status[8] + " : Done"

            source1 = sourcePath.split(os.sep)
            source2 = source1[len(source1) - 1]
            source3 = source2.split(".zip")[0]
            ret_status[9] = ret_status[9] + " : Done"

            shutil.copytree(".." + os.sep + "temp" + os.sep + source3,
                            ".." + os.sep + "files" + os.sep + nodeIP)
            ret_status[10] = ret_status[10] + " : Done"

            shutil.move(
                ".." + os.sep + "temp" + os.sep + "configClient.xml",
                ".." + os.sep + "files" + os.sep + nodeIP + os.sep +
                "configClient.xml")
            ret_status[11] = ret_status[11] + " : Done"

            ##########  Managing Backup Node Copies ##########################

            utilities.createOrReplace(".." + os.sep + "temp" + os.sep)
            ret_status[12] = ret_status[12] + " : Done"

            shutil.move(
                ".." + os.sep + "files" + os.sep + backup_node_ip + os.sep +
                "configClient.xml",
                ".." + os.sep + "temp" + os.sep + "configClient.xml")
            ret_status[13] = ret_status[13] + " : Done"

            shutil.rmtree(".." + os.sep + "files" + os.sep + backup_node_ip)
            ret_status[14] = ret_status[14] + " : Done"

            #utilities.zipdir(sourcePath,"../temp")
            zip_ref = zipfile.ZipFile(sourcePath, 'r')
            zip_ref.extractall(".." + os.sep + "temp")
            zip_ref.close()
            ret_status[15] = ret_status[15] + " : Done"

            source1 = sourcePath.split(os.sep)
            source2 = source1[len(source1) - 1]
            source3 = source2.split(".zip")[0]
            ret_status[16] = ret_status[16] + " : Done"

            shutil.copytree(".." + os.sep + "temp" + os.sep + source3,
                            ".." + os.sep + "files" + os.sep + backup_node_ip)
            ret_status[17] = ret_status[17] + " : Done"

            shutil.move(
                ".." + os.sep + "temp" + os.sep + "configClient.xml",
                ".." + os.sep + "files" + os.sep + backup_node_ip + os.sep +
                "configClient.xml")
            ret_status[18] = ret_status[18] + " : Done"

    except:
        ret_status[0] = False
        ret_status.append("\n**** Exception Occurred: " +
                          str(sys.exc_info()[1]) + str(traceback.print_exc()))

    print("\n Done \n")
    return ret_status
 def _run_rkhunter(self):
     Avalon.info('Launching rkhunter')
     Utilities.execute(['rkhunter'],
                       std_in=sys.stdin,
                       std_out=sys.stdout,
                       std_err=sys.stderr)
    def search_wrapper(client_defined_expand,
                       client_defined_goal_state_check,
                       client_defined_hashed_state,
                       client_defined_compute_state_cost=None,
                       start_state_hash=None,
                       start_state=None,
                       search_type="bfs",
                       debug=False):
        results = {
            "path_to_goal": None,
            "cost_of_path": 0,
            "nodes_expanded": 0,
            "search_depth": 0,
            "max_search_depth": 0,
            "running_time": 0,
            "max_ram_usage": 0
        }

        def update_stats(max_search_depth=None, increment_expanded=False):
            if (max_search_depth is not None
                    and results["max_search_depth"] < max_search_depth):
                results["max_search_depth"] = max_search_depth

            if (increment_expanded == True):
                results["nodes_expanded"] += 1

        # Wrapper to generate node
        def generate_node(node_options):
            return Node(**node_options)

        # Wrap the client_defined_expand and add stats
        def expand_with_stats(state, state_hash):
            children = client_defined_expand(state, state_hash)

            return children

        # Wrapper to track the cost (heuristic) of a given node -- only applicable in A-Star
        def compute_state_cost(state, state_hash):
            if (client_defined_compute_state_cost is not None):
                return client_defined_compute_state_cost(state, state_hash)
            else:
                return 1

        start_time = Utilities.get_current_time()

        node_solution = Algorithms.search(
            expand=expand_with_stats,
            goal_state_check=client_defined_goal_state_check,
            hashed_state=client_defined_hashed_state,
            generate_node=generate_node,
            compute_state_cost=compute_state_cost,
            update_stats=update_stats,
            start_state_hash=start_state_hash,
            start_state=start_state,
            search_type=search_type,
            debug=debug,
        )

        if (node_solution is not None):
            results["path_to_goal"] = Algorithms.get_node_path_to_root(
                node_solution)
            results["search_depth"] = len(results["path_to_goal"])
            results["cost_of_path"] = node_solution.cost

        end_time = Utilities.get_current_time()

        results["running_time"] = end_time - start_time
        max_ram_usage_in_bytes = resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss  # in bytes
        results["max_ram_usage"] = max_ram_usage_in_bytes / 1000000

        return results
Example #41
0
 def __init__(self):
     self.classifier = Classifier()
     self.dataset = 'bbc-dataset-500-rows.csv'
     self.utilities = Utilities()
Example #42
0
    def __init__(self,
                 num_to_keep=None,
                 user=None,
                 db_pwd=None,
                 target_db=None,
                 host=None,
                 tables=[],
                 logging_level=logging.INFO,
                 unittests=False):
        '''
        Most parameters have defaults that can be
        set once and for all. The main params that
        might change are num_to_keep, and tables.
        
        The num_to_keep integer value declares how many
        of the newest backup tables to keep for each table.
        
        The tables list of table names may contain a mix
        of table root names (e.g. AssignmentSubmission, Terms),
        and backup table names (e.g. Terms_2019_01_10_14_14_40_123456)
        For root names, all backup tables are collected and the 
        num_to_keep newest are retained. For backup table names
        only those specific tables are removed.
        
        @param num_to_keep: how many of the latest backup tables
            to retain for each aux table
        @type num_to_keep: int
        @param user: MySQL user for login
        @type user: str
        @param db_pwd: password for logging into MySQL. Don't use
            for security reasons. Instead, put the pwd into
            $HOME/.ssh/canvas_pwd
        @type db_pwd: str
        @param target_db: MySQL where aux tables reside. 
        @type target_db: str
        @param host: MySQL host name
        @type host: str
        @param tables: list of specific tables to consider. If None,
            backups for all aux tables are trimmed. 
        @type tables: [str]
        @param logging_level: how much information to provide during runtime
        @type logging_level: logging.loglevel
        @param unittests: whether this instantiation is from a unittest
        @type unittests: boolean
        '''

        # Get local configuration info:
        self.config_info = ConfigInfo()

        # Access to common functionality:
        self.utils = Utilities()

        if user is None:
            user = config_info.default_user

        if host is None:
            host = config_info.default_host

        if db_pwd is None:
            db_pwd = self.utils.get_db_pwd(host, unittests=unittests)
        elif db_pwd == True:
            db_pwd = self.utils.get_db_pwd(host,
                                           ask_user=True,
                                           unittests=unittests)

        if target_db is None:
            target_db = self.config_info.canvas_db_aux

        self.target_db = target_db

        if num_to_keep is None:
            self.num_to_keep = BackupRemover.default_num_backups_to_keep
        else:
            self.num_to_keep = num_to_keep

        # Better name for tables to consider removing:
        tables_to_consider = tables

        # Unittests expect a db name in self.db:
        self.db = target_db

        self.db_obj = self.utils.log_into_mysql(user,
                                                db_pwd,
                                                db=target_db,
                                                host=host)

        self.utils.setup_logging(logging_level)
        if unittests:
            self.db_name = target_db
            return

        # Get names of all tables in the target_db
        all_tables = self.utils.get_existing_tables_in_dir(self.db_obj,
                                                           return_all=True,
                                                           target_db=target_db)

        # If caller specified only specific tables/backup tables to
        # remove, week out all table names not in caller's list:
        all_tables_to_consider = self.find_tables_to_consider(
            all_tables, tables_to_consider)

        self.remove_old_backups(all_tables_to_consider)
        self.close()
Example #43
0
class LoadHistoryLister(object):
    '''
    Reads table LoadLog. Lists date of latest
    refresh for each table. Lists missing tables,
    and list of all tables.
    '''

    load_table_name = 'LoadLog'
    
    #-------------------------
    # Constructor 
    #--------------

    def __init__(self, latest_only=False, unittests=False):
        '''
        Constructor
        '''
        config_info = ConfigInfo()
        self.utils  = Utilities()
        
        # For convenience:
        self.load_table_name = LoadHistoryLister.load_table_name
        if unittests:
            self.aux_db = 'Unittest'
        else:
            self.aux_db = config_info.canvas_db_aux
        
        # Get results as dictionaries:
        if unittests:
            self.db_obj = self.utils.log_into_mysql(config_info.test_default_user,
                                                    self.utils.get_db_pwd(config_info.test_default_host,
                                                                          unittests=unittests),
                                                    db=self.aux_db,
                                                    host=config_info.test_default_host,
                                                    cursor_class = Cursors.DICT
                                                    )
            # Let unittests call methods on their own:
            return
        else:
            self.db_obj = self.utils.log_into_mysql(config_info.default_user,
                                                    self.utils.get_db_pwd(config_info.default_host,
                                                                          unittests=unittests),
                                                    db=config_info.canvas_db_aux,
                                                    host=config_info.default_host,
                                                    cursor_class = Cursors.DICT
                                                    )
            
        try:
            success = self.print_latest_refresh(latest_only)
            if success:
                self.print_missing_tables()
            # self.backup_availability()
        finally:
            self.db_obj.close()
        
    #-------------------------
    # print_latest_refresh 
    #--------------
    
    def print_latest_refresh(self, 
                             latest_only=False, 
                             out_fd=sys.stdout, 
                             load_log_content=None):
        '''
        Pretty print a list of aux tables that exist in 
        the database.
        
        @param latest_only: if True, only the most recent refresh
            event for each table will be shown.
        @type latest_only: bool
        @param out_fd: if provided, a file-like object to which
            output is written. Default: stdout. Used by unittests,
            but could also be used to write the report to a file.
        @type out_fd: file-like
        @param load_log_content: a list of dicts reflecting the content
            of the LoadLog table. Only used by unittests!
        @type load_log_content: [{}]
        @return: True for success, False for failure
        @rtype: bool
        '''
        
        try:
            # Only read content of LoadLog table if
            # unittests did not pass in their own in
            # the call:
            # Result will be:
            #  [{tbl_name : <str>, num_rows : <int>, time_refreshed : datetime},
            #   {tbl_name : <str>, num_rows : <int>, time_refreshed : datetime},
            #        ..
            #  ]
            if load_log_content is None:
                self.utils.ensure_load_log_table_existence(self.load_table_name, self.db_obj)
                load_log_content = self.db_obj.query(f"SELECT * FROM {self.aux_db}.{self.load_table_name}")
        except ValueError as e:
            out_fd.write(f"Cannot list tables: {repr(e)}\n")
            return False
            
        # Pull all row-dicts out from the query result:
        tbl_dicts   = [tbl_dict for tbl_dict in load_log_content]
        
        # Sort the dicts by table name:
        sorted_tbl_dicts = sorted(tbl_dicts, key=lambda one_dict: one_dict['tbl_name'])
        
        out_fd.write(f"\nAux tables in {self.aux_db}:\n\n")
        
        tbl_nm_header    = 'Table Name'
        load_time_header = 'Last Refreshed'
        num_rows_header  = 'Num Rows'
        # Print the header:
        out_fd.write(f'{tbl_nm_header:>30} {load_time_header:^25} {num_rows_header:^5}\n')

        # If requested, only show the latest update
        # for each table:
        
        if latest_only:
            sorted_tbl_dicts = self.keep_latest_dict(sorted_tbl_dicts)
        
        # For each result dict, pull out the table name,
        # time refreshed, and number of rows. Assign them
        # to variables:
        
        for tbl_entry_dict in sorted_tbl_dicts:
            tbl_nm       = tbl_entry_dict['tbl_name']
            num_rows     = tbl_entry_dict['num_rows']
            
            # Get a UTC datetime obj (b/c we initialize
            # each MySQL session to be UTC):
            utc_load_datetime = tbl_entry_dict['time_refreshed']
            
            # Tell this 'unaware' datetime obj that it'
            tz_aware_load_datetime = utc_load_datetime.replace(tzinfo=timezone.utc)
            localized_datetime = tz_aware_load_datetime.astimezone(tz=None)
            load_time_str = localized_datetime.strftime("%Y-%m-%d %H:%M:%S %Z")

            # The ':>30' is "right-justfy; allow 30 chars.
            # The '^20'  is "center-justfy; allow 20 chars.
            out_fd.write(f"{tbl_nm:>30}   {load_time_str:^25} {num_rows:^5}\n")
          
        return True
      
    #-------------------------
    # keep_latest_dict
    #--------------
    
    def keep_latest_dict(self, load_event_dicts):
        '''
        Given a list of dicts with table-name, load-date,
        and row num keys, return a new list with only the
        dicts that describe the most recent table refresh.
        
        @param load_event_dicts: array of dict describing table
            refresh events.
        @type load_event_dicts: [{}]
        '''
        # Dict {tbl_name : load_event_dict} to hold
        # the most recent dict for the respective table.
        # Use an ordered dict to not mess up order of
        # passed-in dicts:
         
        latest_dicts = OrderedDict()
        for load_event_dict in load_event_dicts:
            tbl_nm = load_event_dict['tbl_name']
            try:
                if load_event_dict['time_refreshed'] > latest_dicts[tbl_nm]['time_refreshed']:
                    latest_dicts[tbl_nm] = load_event_dict
            except KeyError:
                # First time we see an entry for this table:
                latest_dicts[tbl_nm] = load_event_dict
        
        res = [newest_refresh_dict for newest_refresh_dict in latest_dicts.values()]
        return res        
      
    #-------------------------
    # print_missing_tables 
    #--------------

    def print_missing_tables(self, num_cols=4):
        '''
        Print the tables that are missing in the 
        aux tables database. Print in column form,
        alpha sorted.
        
        @param num_cols: number of table names in one row
        @type num_cols: int
        @return: True for success, False for failure
        @rtype: bool
        '''

        all_tables     = set(self.utils.create_table_name_array())
        tables_present = self.utils.get_tbl_names_in_schema(self.db_obj, self.aux_db)
        tables_present = set([table_dict['TABLE_NAME'] for table_dict in tables_present])

        missing_tables = all_tables - tables_present
        if len(missing_tables) == 0:
            print("No missing tables.")
            return True

        self.utils.print_columns(missing_tables, 'Missing Tables:', num_cols=num_cols, alpha=True)        
         
        return True    
Example #44
0
                        help='Save data to CSV')
    parser.add_argument('-wimages',
                        action='store_true',
                        default=False,
                        dest='wimages',
                        help='Save images in query folder')
    arguments = parser.parse_args()

    if arguments.dataset:
        datasetPath = arguments.dataset
    else:
        parser.print_help()
        print("-dataset <datasetPath>")
        sys.exit(1)

    util = Utilities()

    # experiment directory
    expDir = "sift_experiments"
    qBuildings = ['22', '39', '60']
    qidx = 0
    ## Prepare Dataset ##
    dataset, queryList = util.createDataset(datasetPath, qBuildings)
    #creating result lists & house rank list of (<image>,#inliers,#inliers,#accuracy)
    resList = np.zeros(len(dataset), [('idx', 'int16'), ('imageId', 'a28'),
                                      ('inliers', 'int16'),
                                      ('percent', 'float')])
    rankedClassList = np.zeros(15, [('idx', 'int16'), ('imageId', 'a28'),
                                    ('inliers', 'int16'), ('percent', 'float'),
                                    ('building', 'int8')])
Example #45
0
class Bot:
    """Bot object each group will have that handle checking for commands and processing them"""
    def __init__(self,
                 group,
                 yt_key=None,
                 delim="$",
                 refresh_group_interval=600):
        """
        :param group: the group this object will read messages from
        :param yt_key: youtube api key. need it to use yt_search but not needed for other commands
        :param delim: the first character that will let the bot know it is a command. default is "$"
        """
        self.group = group
        self.delim = delim
        self.ult = Utilities(yt_key)
        self.tags = Tags(group.name, group.id, group.members)
        self.valid_commands = ["avatar", "git", "yt", "tag", "help"]
        Timer(refresh_group_interval, self.reload_group).start()

    def get_message(self):
        """
        :return: returns the latest message from a group. if there is an error, return None
        """
        try:
            return self.group.messages.list()[0]
        except Exception as err:
            Utilities.log(
                "Exception: {self.group.name}: bot.get_message: {err}")
            return None

    def reload_tags(self):
        """
        :return: reloads the tags in the Tag object
        """
        self.tags.reload_tags()

    def reload_group(self, stop=Event()):
        """
        :param stop: threading Event. not set by default so this method would be called every 10 minutes
        :return: updates the group name, group id, and group members of a group every 10 minutes
        """
        self.group.refresh_from_server()
        self.tags.update_members(self.group.members)
        self.tags.update_group_id(self.group.id)
        self.tags.update_group_name(self.group.name)
        self.tags.save_tags()

        if not stop.is_set():
            Timer(600, self.reload_group).start()

    def save_tags(self):
        """
        :return: writes the tags to a file for the group
        """
        self.tags.save_tags()

    def find_owner_name(self, user_id):
        """
        :param user_id: user_id of a member in the group
        :return: returns the nickname associated with the user_id
        """
        return list(filter(lambda x: x.user_id == user_id,
                           self.group.members))[0]

    def find_avatar(self, message, mentions):
        """
        :param message: the avatar command. checks to see if it's a help call or actual usage
        :param mentions: list of attachments with the message. uses it to check for mentions
        :return: avatar url of person mentioned or an error message saying to mention the user
        """
        if message[2] == "help":
            return "Usage: avatar [person]"
        else:
            mentions = list(filter(lambda x: x.type == "mentions", mentions))
            if len(mentions) == 1:
                user_id = mentions[0].user_ids[0]
                return self.find_owner_name(user_id).image_url
            return "Please mention the one person you want the avatar of"

    def send_message(self, message):
        """
        :param message: message that will be sent to the group
        :return: message should post in the group
        """
        try:
            if isinstance(message, list):
                for res in message:
                    self.group.post(res)
            else:
                self.group.post(message)
        except Exception as err:
            Utilities.log(
                f"Exception: {self.group.name}: bot.send_message: {err}")

    def process_message(self, message):
        """
        :param message: checks if the message is a valid comand and execute command it is associated with
        :return: results of the command executed
        """
        if message is not None:
            try:
                message_text = message.text.lower()
                delim = message_text[:len(self.delim)]
                message_text = message_text[len(self.delim):]
                message_text = message_text.split(" ")
                command = message_text[0]

                if delim == self.delim and command in self.valid_commands:
                    user_id = message.user_id
                    owner = self.find_owner_name(user_id)
                    Utilities.log(
                        f"{self.group.name}: Processing from {owner}: {message_text}, Command: {command}"
                    )
                    result = None
                    if command == "help":
                        result = self.ult.post_help()
                    if command == "avatar":
                        result = self.find_avatar(message_text,
                                                  message.attachments)
                    if command == "git":
                        result = self.ult.git()
                    if command == "yt":
                        query = ' '.join(message_text[1:])
                        result = self.ult.yt_search(query)
                    if command == "tag":
                        result = self.tags.parse_commands(
                            message_text, user_id, message.attachments)

                    if result is not None:
                        Utilities.log(
                            f"{self.group.name}: posting \"{result}\"")
                        self.send_message(result)
            except Exception as err:
                if isinstance(err, googleapiclient.errors.HttpError):
                    self.send_message(str(err))
                if message.text is None:
                    pass
                else:
                    Utilities.log(
                        f"{self.group.name}: bot.process_message: {err}")
Example #46
0
    def generateAnswer(choice):
        '''
        This function takes in a choice as input and it loads the corresponding model of that choice and
        uses the loaded model to predict the output and the weights.

        Parameters:
        choice (str) : It can be either 'single' or 'double'

        Returns:
        story (list) : A list of sentences in the story
        question (str) : The question
        correct_answer (str) : The correct answer
        weights1 (numpy array) : The array of weights for outer hop
        weights2 (numpy array) : The array of weights of inner hop
        predicted_answer (str) : The anwer predicted by the model
        '''
        tar = tarfile.open('Data/babi_tasks_1-20_v1-2.tar.gz')

        challenges = {
          # QA1 with 10,000 samples
          'single_supporting_fact_10k': 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt',
          # QA2 with 10,000 samples
          'two_supporting_facts_10k': 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt',
        }

        if choice == 'single':
            ## Single Supporting Fact Challenge
            ss_train_stories, ss_test_stories, \
                ss_stories_train, ss_questions_train, ss_answers_train, \
                ss_stories_test, ss_questions_test, ss_answers_test, \
                ss_story_maxlen, ss_story_maxsents, ss_question_maxlen, \
                ss_vocab, ss_vocab_size, ss_word2idx = \
                Preprocess.getData(challenges['single_supporting_fact_10k'], tar)

            ss_idx2word = {value : key for key, value in ss_word2idx.items()}

            single_model = Utilities.loadModel('single_model')
            single_debug_model = Utilities.loadModel('single_debug_model')

            story, question, correct_answer, weights2, predicted_answer = \
                Models.predictSingleModelAnswer(ss_test_stories, ss_stories_test, ss_questions_test, ss_idx2word, single_model, single_debug_model)
            weights1 = np.zeros(weights2.shape)

            K.clear_session()

            return story, question, correct_answer, weights1, weights2, predicted_answer

        else:
            ## Two Supporting Fact challenge
            ts_train_stories, ts_test_stories, \
                ts_stories_train, ts_questions_train, ts_answers_train, \
                ts_stories_test, ts_questions_test, ts_answers_test, \
                ts_story_maxlen, ts_story_maxsents, ts_question_maxlen, \
                ts_vocab, ts_vocab_size, ts_word2idx = \
                Preprocess.getData(challenges['two_supporting_facts_10k'], tar)

            ts_idx2word = {value : key for key, value in ts_word2idx.items()}

            double_model = Utilities.loadModel('double_model')
            double_debug_model = Utilities.loadModel('double_debug_model')

            story, question, correct_answer, weights1, weights2, predicted_answer = \
                Models.predictDoubleModelAnswer(ts_test_stories, ts_stories_test, ts_questions_test, ts_idx2word, double_model, double_debug_model)

            K.clear_session()

            return story, question, correct_answer, weights1, weights2, predicted_answer
Example #47
0
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LinearRegression

wine_white_results = open('wine_white_results.txt', 'w')
wine_red_results = open('wine_red_results.txt', 'w')
# read data into a DataFrame
data = pd.read_csv("winequality-white.csv", delimiter=';')
data2 = pd.read_csv("winequality-red.csv", delimiter=';')

X = data[[
    "fixed acidity", "volatile acidity", "citric acid", "residual sugar",
    "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density",
    "pH", "sulphates", "alcohol"
]]
y = data[["quality"]]
_indent = Utilities.draw_whatever("-", 6)

#calculate correlation matrix
corMat = DataFrame(data.iloc[:, :11].corr())
wine_white_results.writelines(_indent + 'correlation matrix' + _indent + '\n')
wine_white_results.writelines(str(corMat) + '\n')
# instantiate a logistic regression model, and fit with X and y
model = LinearRegression()
model = model.fit(X, y.values.ravel())

# check the accuracy on the training set
score = model.score(X, y)
wine_white_results.write("accuracy :" + str(score) + '\n')
# print intercept and coefficients
wine_white_results.write("intercept_ :" + str(model.intercept_) + '\n')
Example #48
0
import os, sys, vtk
import numpy as np

script_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(script_path, '../src'))

from utilities import Utilities

rescale_path = '/work/lpzmateo/data/DL_shapes/shapes/no_features_no_groups/rescaled'
csv_path = '/work/lpzmateo/data/DL_shapes/shapes/no_features_no_groups/dataset_description.csv'

util = Utilities()

csv_dict = util.readDictCSVFile(csv_path)
print('getting params')
min_coord = None
max_coord = None
for path in csv_dict['VTK Files']:
    reader = vtk.vtkPolyDataReader()
    reader.SetFileName(path)
    reader.Update()
    polydata = reader.GetOutput()

    for i in range(polydata.GetNumberOfPoints()):
        ptn = polydata.GetPoint(i)
        for n in ptn:
            if (min_coord == None):
                min_coord = n
            elif (min_coord > n):
                min_coord = n
Example #49
0
    def train(self):
        """ Trains the GAN system """
        # break condition for training (when diverging)
        loss_increase_count = 0
        prev_g_loss = 0

        conn = SQLConnector()

        idx = np.arange(self.batch_size)

        for epoch in range(self.max_epochs):
            #selecting batch_size random attacks from our training data
            #idx = np.random.randint(0, X_train.shape[0], batch_size)
            attacks = self.X_train[idx]

            # generate a matrix of noise vectors
            noise = np.random.normal(0, 1, (self.batch_size, 41))

            # create an array of generated attacks
            gen_attacks = self.generator.predict(noise)

            # loss functions, based on what metrics we specify at model compile time
            c_loss_real = self.critic.train_on_batch(attacks, self.valid)
            c_loss_fake = self.critic.train_on_batch(gen_attacks, self.fake)
            d_loss = 0.5 * np.add(c_loss_real, c_loss_fake)

            for l in self.critic.layers:
                weights = l.get_weights()
                weights = [
                    np.clip(w, -self.clip_value, self.clip_value)
                    for w in weights
                ]
                l.set_weights(weights)

            # generator loss function
            g_loss = self.gan.train_on_batch(noise, self.valid)

            if epoch % 500 == 0:
                print(
                    "%d [D loss: %f, acc.: %.2f%%] [G loss: %f] [Loss change: %.3f, Loss increases: %.0f]"
                    % (epoch, d_loss[0], 100 * d_loss[1], g_loss,
                       g_loss - prev_g_loss, loss_increase_count))

        gen_attacks = self.scaler.inverse_transform(gen_attacks)
        predicted_gen_attack_labels = self.evaluator.predict(
            gen_attacks).transpose().astype(int)
        gen_attack_labels = np.full(predicted_gen_attack_labels.shape, 1)

        print("Generated attack labels: ")
        print(gen_attack_labels)
        print("Predicted labels of generated attacks: ")
        print(predicted_gen_attack_labels)

        right = (predicted_gen_attack_labels == 1).sum()
        wrong = (predicted_gen_attack_labels != 1).sum()

        accuracy = (right / float(right + wrong))

        print("5 generated attacks: ")
        print(gen_attacks[:5, :])
        print()
        print("Accuracy of evaluator on generated data: %.4f " % accuracy)
        if accuracy > .50:
            conn.write_gens(gen_attacks, util.attacks_to_num(self.attack_type))

        layersstr = str(self.generator_layers[0]) + "," + str(
            self.generator_layers[1]) + "," + str(self.generator_layers[2])
        attack_num = util.attacks_to_num(self.attack_type)

        conn.write_hypers(layerstr=layersstr,
                          attack_encoded=attack_num,
                          accuracy=accuracy)
Example #50
0
    def driver():
        TimerUtility.start_timer('drv_driver')
        name = "_cumcolor_urban_"
        output_dir = Scenario.get_scen_value("output_dir")
        landuse_flag = len(Scenario.get_scen_value("landuse_data_file")) > 0
        nrows = IGrid.nrows
        ncols = IGrid.ncols
        total_pixels = IGrid.get_total_pixels()
        z_cumulate = PGrid.get_cumulate()
        sim_landuse = PGrid.get_land1()

        # Create Annual Landuse Probability File
        if Processing.get_processing_type() == Globals.mode_enum["predict"]:
            if landuse_flag:
                LandClass.init_annual_prob(total_pixels)

        # Monte Carlo Simulation
        Driver.monte_carlo(z_cumulate, sim_landuse)

        if Processing.get_processing_type() == Globals.mode_enum["predict"]:
            # Output Urban Images
            if IGrid.using_gif:
                filename = f"{output_dir}cumulate_urban.gif"
            else:
                filename = f"{output_dir}cumulate_urban.tif"
                IGrid.echo_meta(f"{output_dir}cumulate_urban.tfw", "urban")
            colortable = Color.get_grayscale_table()

            ImageIO.write_gif(z_cumulate, colortable, filename, "", nrows,
                              ncols)
            Utilities.write_z_prob_grid(z_cumulate.gridData, name)

            if landuse_flag:
                cum_prob, cum_uncert = LandClass.build_prob_image(total_pixels)
                #print(cum_prob)

                # Output Cumulative Prob Image
                if IGrid.using_gif:
                    filename = f"{output_dir}cumcolor_landuse.gif"
                else:
                    filename = f"{output_dir}cumcolor_landuse.tif"
                    IGrid.echo_meta(f"{output_dir}cumcolor_landuse.tfw",
                                    "landuse")
                cum_prob_grid = IGrid.wrap_list(cum_prob)
                ImageIO.write_gif(cum_prob_grid, Color.get_landuse_table(),
                                  filename, "", nrows, ncols)

                # Output Cumulative Uncertainty Image
                if IGrid.using_gif:
                    filename = f"{output_dir}uncertainty.landuse.gif"
                else:
                    filename = f"{output_dir}uncertainty.landuse.tif"
                    IGrid.echo_meta(f"{output_dir}uncertainty.landuse.tfw",
                                    "landuse")
                cum_uncert_grid = IGrid.wrap_list(cum_uncert)
                ImageIO.write_gif(cum_uncert_grid, Color.get_grayscale_table(),
                                  filename, "", nrows, ncols)

        if not landuse_flag or Processing.get_processing_type(
        ) == Globals.mode_enum['predict']:
            fmatch = 0.0
        else:
            landuse1 = IGrid.igrid.get_landuse_igrid(1)
            fmatch = Driver.fmatch(sim_landuse, landuse1, landuse_flag,
                                   total_pixels)

        Stats.analyze(fmatch)
        TimerUtility.stop_timer('drv_driver')
Example #51
0
    def spread(z, avg_slope):
        TimerUtility.start_timer('spr_spread')
        sng = 0
        sdc = 0
        og = 0
        rt = 0

        nrows = IGrid.nrows
        ncols = IGrid.ncols
        total_pixels = nrows * ncols

        road_gravity = Coeff.get_current_road_gravity()
        diffusion = Coeff.get_current_diffusion()
        breed = Coeff.get_current_breed()
        spread = Coeff.get_current_spread()

        excld = IGrid.igrid.get_excld_grid()
        roads = IGrid.igrid.get_road_grid_by_year(
            Processing.get_current_year())
        slope = IGrid.igrid.get_slope_grid()

        nrows = IGrid.nrows
        ncols = IGrid.ncols

        # Zero the growth array for this time period
        delta = [0] * (nrows * ncols)

        # Get slope rates
        slope_weights = Spread.get_slope_weights()

        # Phase 1N3 - Spontaneous Neighborhood Growth and Spreading
        sng, sdc = Spread.phase1n3(diffusion, breed, z.gridData, delta, slope,
                                   excld, slope_weights, sng, sdc)

        # Phase 4 - Organic Growth
        og = Spread.phase4(spread, z.gridData, excld, delta, slope,
                           slope_weights, og)

        # Phase 5 - Road Influence Growth
        rt = Spread.phase5(road_gravity, diffusion, breed, z.gridData, delta,
                           slope, excld, roads, slope_weights, rt)

        Utilities.condition_gt_gif(delta, UGMDefines.PHASE5G, delta, 0)
        Utilities.condition_ge_gif(excld, 100, delta, 0)

        # Now place growth array into current array
        num_growth_pix = 0
        avg_slope = 0.0

        for i in range(total_pixels):
            if z.gridData[i] == 0 and delta[i] > 0:
                # New growth being placed into array
                avg_slope += slope[i]
                z.gridData[i] = delta[i]
                num_growth_pix += 1
        pop = 0
        for pixels in z.gridData:
            if pixels >= UGMDefines.PHASE0G:
                pop += 1

        if num_growth_pix == 0:
            avg_slope = 0.0
        else:
            avg_slope /= num_growth_pix

        TimerUtility.stop_timer('spr_spread')
        return avg_slope, num_growth_pix, sng, sdc, og, rt, pop
Example #52
0
class ShapeEvaluator():
	def __init__(self):

		self.input_description_path=None
		self.input_description=None


		self.model_info_path=None
		self.model_info=None

		self.dataset_info_path=None
		self.dataset_info=None

		self.tfrecord_info_path=None
		self.tfrecord_info=None


		self.output_dir=None

		self.util=Utilities()

	def setInputDescription(self,path):
		self.input_description_path=path
		self.input_description=self.util.readDictCSVFile(path)


	def setModelInformation(self,path):
		self.model_info_path=path
		self.model_info=self.util.readJSONFile(path)

		self.dataset_info_path=self.model_info['dataset_info_path']
		self.dataset_info=self.util.readJSONFile(self.dataset_info_path)

		self.tfrecord_info_path=self.dataset_info['tfrecord_info']
		self.tfrecord_info=self.util.readJSONFile(self.tfrecord_info_path)

	def setOutputDirectory(self,path):
		self.output_dir=path

	def evaluate(self):
		print('Starting evaluation')

		graph = tf.Graph()

		with graph.as_default():

			if self.model_info['model_type']=='classification':

				from classification_nn import ClassificationNN
				nn=ClassificationNN()
				nn.setTFRecordInfo(tfrecord_info=self.tfrecord_info)


				with tf.variable_scope("evaluation_data"):
					if 'VTK Files' in self.input_description:
						data_extractor=ShapeDataExtractor()
						data_extractor.setCSVDescription(self.input_description_path)
						if self.tfrecord_info['extraction_info']['points_feature']:
							data_extractor.setPointFeature(self.tfrecord_info['extraction_info']['points_feature']['feature_names'])
						if self.tfrecord_info['extraction_info']['cells_feature']:
							data_extractor.setCellFeature(self.tfrecord_info['extraction_info']['cells_feature']['feature_names'])

						data_extractor.setOutputDirectory(os.path.join(self.output_dir,'tfrecords'))

						tfrecord_info_path=data_extractor.extractAndSave()
						nn.setTFRecordInfo(tfrecord_info_path=tfrecord_info_path)

						

					dataset=nn.extractSet(self.input_description['TFRecords'],
											batch_size=len(self.input_description['TFRecords']), 
											num_epochs=None, 
											shuffle_buffer_size=None,
											variable_scope='evaluation_set')
					ite = dataset.make_initializable_iterator()
					data_tuple=ite.get_next()


				nn.setTFRecordInfo(tfrecord_info=self.tfrecord_info)
				ops=nn.getOps(data_tuple=data_tuple,
							# images=None, 
							is_training=False,
							#learning_rate=self.learning_rate,
							# decay_steps=10000, 
							# decay_rate=0.96, 
							# staircase=False,
							ps_device="/cpu:0",
							w_device="/cpu:0")

				with tf.Session() as sess:
					#Global Variables Initialisation
					sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

					#Initialazing The Iterators
					sess.run([ite.initializer])

					#Initializing the model saver
					saver = tf.train.Saver()
					saver.restore(sess,self.model_info['model_path'])

					#eval
					feed_dict=nn.getEvaluationParameters()
					predictions = sess.run(ops['class_prediction'],feed_dict=feed_dict)


		#convert digit into original class name

		for i in range(len(predictions)):
			predictions[i]=self.tfrecord_info['class_corres_digit_to_name'][str(predictions[i])]

		new_description=self.input_description
		new_description['Predictions']=predictions

		prediction_path=os.path.join(self.output_dir,'prediction_description.csv')
		self.util.writeDictCSVFile(new_description,prediction_path)

		print('Prediction description saved: %s'%(prediction_path))

		return prediction_path
Example #53
0
def createConfigNode(node_ip, backup_node_ip, executableList=[]):

    ret_status = [
        True, "Step 1(of 5) - Getting config.xml",
        "Step 2(of 5) - Checking Config File",
        "Step(3 of 5) - Creating Node Folders",
        "Step 4(of 5) - Writing to Node Config Files",
        "Step 5(of 5) - Writing to main Config File"
    ]
    print("\n Running createConfigNode() \n")

    try:
        values = NodeStatus.getNodeList()
        tree = ElementTree.parse(".." + os.sep + "config.xml")
        ret_status[1] = ret_status[1] + " Done"

        root = tree.getroot()
        if node_ip in values:
            ret_status[0] = False
            ret_status[2] = ret_status[
                2] + 'ERROR CODE 1003: Node with Ip Address :' + node_ip + " already present in config.xml"
            return ret_status
        else:
            ret_status[2] = ret_status[2] + " : Done"
            node_element = ElementTree.Element('node')

            node_ip_element = ElementTree.Element('nip')
            node_ip_element.text = node_ip
            node_bip_element = ElementTree.Element('nbip')
            node_bip_element.text = backup_node_ip
            node_executable_element = ElementTree.Element('executables')

            executable_files = []
            for files in executableList[:]:
                node_file_element = ElementTree.Element('file')
                node_file_element.text = files
                node_executable_element.append(node_file_element)

            node_element.append(node_ip_element)
            node_element.append(node_bip_element)
            node_element.append(node_executable_element)

            root.append(node_element)
            tree = ElementTree.ElementTree(root)

            ########## Creating Folders for the nodes  ##############################
            utilities.checkExistOrCreate(".." + os.sep + "files")
            path = ".." + os.sep + "files" + os.sep + node_ip
            pathbkup = ".." + os.sep + "files" + os.sep + backup_node_ip
            utilities.createOrReplace(path)
            utilities.createOrReplace(pathbkup)
            ret_status[3] = ret_status[3] + " : Done"

            ########## Writing to all the config files  ##############################
            child_root = ElementTree.Element("client")
            child_root.append(node_element)

            tree_child = ElementTree.ElementTree(child_root)
            tree_child.write(path + os.sep + "configClient.xml")
            tree_child.write(pathbkup + os.sep + "configClient.xml")
            ret_status[4] = ret_status[4] + " : Done"

            tree.write(".." + os.sep + "config.xml")
            ret_status[5] = ret_status[5] + " : Done"

    except:
        ret_status[0] = False
        ret_status.append("\n**** Exception Occurred: " +
                          str(sys.exc_info()[1]) + str(traceback.print_exc()))

    print("\n Done \n")
    return ret_status
Example #54
0
def main():

    print()
    conn = SQLConnector()
    data = conn.pull_all_attacks(num=10000)
    dataframe = pd.DataFrame.from_records(
        data=data, columns=conn.pull_kdd99_columns(allQ=True))
    d = defaultdict(LabelEncoder)
    features = dataframe.iloc[:, :41]
    attack_labels = dataframe.iloc[:, 41:]

    for i in range(0, attack_labels.size):
        attack_labels.at[i, 'attack_type'] = util.attacks_to_num(
            attack_labels.at[i, 'attack_type'])

    fit = features.apply(lambda x: d[x.name].fit_transform(x))

    unbalanced_df = fit.join(attack_labels)
    balanced_df = unbalanced_df.copy(deep=True)

    gen_data = np.asarray(conn.read_gen_attacks_acc_thresh(.90, 1000))
    gen_df = pd.DataFrame.from_records(
        gen_data, columns=conn.pull_kdd99_columns(allQ=True))
    gen_df = gen_df.fillna(0)
    balanced_df = pd.concat([balanced_df, gen_df])
    print(len(balanced_df))

    unbalanced_array = unbalanced_df.values
    balanced_array = balanced_df.values

    # BEGIN LOOP
    # Create two identical multi-class classifiers, make sure their output dimensions match the number of classes in our data

    layers = [16, 32, 16]
    alpha = 0.1
    dropout = 0.3

    unb_labels = unbalanced_array[:, 41]
    [unb_classes, unb_counts] = np.unique(unb_labels, return_counts=True)
    print("Unique classes in unbalanced labels: ")
    print(unb_classes)
    print("Counts for the classes in unbalanced labels: ")
    print(unb_counts)
    unb_class_count = len(unb_classes)
    print("Number of classes in unbalanced dataset: " + str(unb_class_count))

    bal_labels = balanced_array[:, 41]
    [bal_classes, bal_counts] = np.unique(bal_labels, return_counts=True)

    dummy_bal_labels = np_utils.to_categorical(bal_labels)
    bal_class_count = len(bal_classes)
    print("Number of classes in balanced dataset: " + str(bal_class_count))

    print("Unique classes in balanced labels: ")
    print(bal_classes)
    print("Counts for the classes in balanced labels: ")
    print(bal_counts)

    for j in range(100):
        unbalanced_classifier = build_discriminator(layers, alpha, dropout,
                                                    unb_class_count)
        balanced_classifier = build_discriminator(layers, alpha, dropout,
                                                  bal_class_count)

        optimizer = Adam(.001)
        unbalanced_classifier.compile(loss='sparse_categorical_crossentropy',
                                      optimizer=optimizer,
                                      metrics=['accuracy'])
        balanced_classifier.compile(loss='sparse_categorical_crossentropy',
                                    optimizer=optimizer,
                                    metrics=['accuracy'])

        # encoding labels, classifier wants them in range 0 to num_classes
        unb_enc = LabelEncoder()
        bal_enc = LabelEncoder()

        unb_labels = unbalanced_array[:, 41]
        bal_labels = balanced_array[:, 41]

        unb_enc = unb_enc.fit(unb_labels)
        bal_enc = bal_enc.fit(bal_labels)

        unbalanced_array[:, 41] = unb_enc.transform(unbalanced_array[:, 41])
        balanced_array[:, 41] = bal_enc.transform(balanced_array[:, 41])
        [unb_classes, _] = np.unique(unbalanced_array[:, 41],
                                     return_counts=True)
        train_data = unbalanced_array[:, :41].astype(int)
        unb_cm = train(unbalanced_classifier, unbalanced_array, train_data)
        bal_cm = train(balanced_classifier, balanced_array, train_data)

        print("Metrics for iteration " + str(j))
        # print("Confusion matrix of unbalanced: ")
        # print
        print("Accuracy of unbalanced: " + str(getmetrics(unb_cm)))

        # print("Confusion matrix of balanced: ")
        # print(bal_cm)
        print("Accuracy of balanced" + str(getmetrics(bal_cm)))

        print("Diff: " + str(getmetrics(bal_cm) - getmetrics(unb_cm)))
Example #55
0
    def find_best_params(img,
                         dimg,
                         rx,
                         ry,
                         block_size,
                         step_size,
                         spatial_factor,
                         intensity_shrinkage,
                         max_x_offset,
                         max_y_offset,
                         err_func=ImageUtils.rmse,
                         verbosity=0):
        if verbosity > 0:
            print("<{0}>".format(ry), end='')
        if verbosity > 1:
            argdict = locals().copy()
            for k in argdict.keys():
                val = argdict[k]
                if not Utilities.is_iterable(val):
                    print("  {0}: {1}".format(k, argdict[k]))
                else:
                    print("  {0} is iterable".format(k))
        if verbosity > 1:
            print("rx= {0}, ry= {1}".format(rx, ry))
        if rx > 0:
            pass
        left = max(int(rx / 2) - max_x_offset, 0)
        right = min(int(rx / 2) + max_x_offset, dimg.shape[1] - block_size)

        up = max(int(ry / 2) - max_y_offset, 0)
        down = min(int(ry / 2) + max_y_offset, dimg.shape[0] - block_size)
        if (left >= right) or (up >= down):
            import pdb
            pdb.set_trace()
        if verbosity > 0:
            pass

        rblock = img[ry:ry + block_size, rx:rx + block_size]
        rmean = np.mean(rblock)

        best_err = np.finfo('float').max
        tries = 0
        best_x = rx
        best_y = ry
        best_mean_adj = 0
        for dx in range(left, right, step_size):
            for dy in range(up, down, step_size):
                temp = dimg[dy:dy + block_size,
                            dx:dx + block_size] * intensity_shrinkage
                if (temp.shape[0] != rblock.shape[0]) or (temp.shape[1] !=
                                                          rblock.shape[1]):
                    msg = Utilities.last_exception_info()
                    warnings.warn(msg)

                dmean = np.mean(temp)
                mean_add = rmean - dmean
                dblock = temp + mean_add
                dblock = np.clip(dblock, 0, 255)
                newmean = np.mean(dblock)
                if (dblock.shape[0] != rblock.shape[0]) or (dblock.shape[1] !=
                                                            rblock.shape[1]):
                    msg = "range and domain have different shapes"
                    msg += "rx={0}, ry={1}".format(rx, ry)
                    raise RuntimeError(msg)
                err = np.finfo('float').max
                try:
                    if (dblock.shape[0] != rblock.shape[0]) or (
                            dblock.shape[1] != rblock.shape[1]):
                        msg = Utilities.last_exception_info()
                        warnings.warn(msg)
                    err = err_func(rblock, dblock)
                except Exception as e:
                    emsg = Utilities.last_exception_info()
                    print(emsg)
                    raise RuntimeError(emsg)
                tries += 1
                if err < best_err:
                    best_x = dx
                    best_y = dy
                    best_mean_add = mean_add
                    best_err = err
        if tries == 0:
            msg = "tries==0, rx={0}, ry= {1}".format(rx, ry)
            raise RuntimeError(msg)
        if best_x > dimg.shape[1] - block_size or best_y > dimg.shape[
                0] - block_size:
            msg = "codes out of range, x= {0}, y={1}".format(best_x, best_y)
            msg += "image wd= {0}, ht= {1}".format(dimg.shape[1],
                                                   dimg.shape[0])
            print(msg)
            raise RuntimeError(msg)
        return (best_x, best_y, best_mean_add, rx, ry, best_err, tries)
Example #56
0
                        help='Save image to file')
    parser.add_argument('-o',
                        action='store_true',
                        default=False,
                        dest='o',
                        help='Save data to CSV')
    arguments = parser.parse_args()

    if arguments.im1:
        img1Path = str(arguments.im1)[2:-2]
    else:
        parser.print_help()
        print("-img1: Query Image")
        sys.exit(1)

    util = Utilities()

    #image counter
    n = 0
    ## Prepare Dataset ##
    dataset = []
    listImages = glob.glob('dataset/*.jpg')
    for i in listImages:
        dataset.append(i.split('/')[-1])

    #creating a list of (<image>,#inliers) pairs
    resList = np.zeros(len(dataset), [('idx', 'int16'), ('imageId', 'a28'),
                                      ('inliers', 'int16'),
                                      ('percent', 'float')])

    print("\n================")
Example #57
0
    step_size = 2
    verbosity = 1
    intensity_shrinkage = 0.7

    oimg = ImageUtils.trim_image(small_img,
                                 spatial_factor=spatial_factor,
                                 block_size=block_size,
                                 verbosity=verbosity)

    # show image
    plt.imshow(oimg, cmap=plt.get_cmap('gray'))  #, vmin=0, vmax=1)
    plt.show()

    # compress
    Comp = Compressor()
    start = Utilities.now()
    print(start)
    params = Comp.compress_image(oimg,
                                 block_size=block_size,
                                 spatial_factor=spatial_factor,
                                 intensity_shrinkage=intensity_shrinkage,
                                 err_func=ImageUtils.rmse,
                                 max_x_offset=None,
                                 max_y_offset=None,
                                 verbosity=1)
    end = Utilities.now()
    print(end)
    cdf = pd.DataFrame(params['codes'])
    print("rmse= {0}".format(np.sqrt(cdf['err'].mean())))
    # decompress
    Decomp = Decompressor()
Example #58
0
<<<<<<< HEAD

def main():
=======
from groupmembers import print_group_members

def main():
	print_group_members()

>>>>>>> e5ec83de46f0de1c97640e694748734d8edf81f3
	# initialize the utilites class. donwloads mnist data and initializes input variable x,
	# predicted output label valriable y_
	num_iterations = 20000
	batch_size = 50
	learning_rate = 0.5
	utility_obj = Utilities(num_iterations, batch_size, learning_rate)
	
	# Read the USPS data from proj3_images folder and store for further use
	utility_obj.get_usps_data()

	# create the logistic regression model, train using mnist data and test it using mnist and usps data set
	logistic_regression(utility_obj)

	# create single layer neural network model, train using mnist and test it using mnist and usps
	num_neurons = 100
<<<<<<< HEAD
	# single_layer_nn(utility_obj,num_neurons)

	# create convolutional neural network model, train using mnist and test it using mnist and usps
	# train_cnn(utility_obj)
=======
Example #59
0
class BackupRemover(object):
    '''
    Utility to remove all but a given number of
    table backups from the aux directory.
    '''

    default_num_backups_to_keep = 2

    #------------------------------------
    # Constructor
    #-------------------

    def __init__(self,
                 num_to_keep=None,
                 user=None,
                 db_pwd=None,
                 target_db=None,
                 host=None,
                 tables=[],
                 logging_level=logging.INFO,
                 unittests=False):
        '''
        Most parameters have defaults that can be
        set once and for all. The main params that
        might change are num_to_keep, and tables.
        
        The num_to_keep integer value declares how many
        of the newest backup tables to keep for each table.
        
        The tables list of table names may contain a mix
        of table root names (e.g. AssignmentSubmission, Terms),
        and backup table names (e.g. Terms_2019_01_10_14_14_40_123456)
        For root names, all backup tables are collected and the 
        num_to_keep newest are retained. For backup table names
        only those specific tables are removed.
        
        @param num_to_keep: how many of the latest backup tables
            to retain for each aux table
        @type num_to_keep: int
        @param user: MySQL user for login
        @type user: str
        @param db_pwd: password for logging into MySQL. Don't use
            for security reasons. Instead, put the pwd into
            $HOME/.ssh/canvas_pwd
        @type db_pwd: str
        @param target_db: MySQL where aux tables reside. 
        @type target_db: str
        @param host: MySQL host name
        @type host: str
        @param tables: list of specific tables to consider. If None,
            backups for all aux tables are trimmed. 
        @type tables: [str]
        @param logging_level: how much information to provide during runtime
        @type logging_level: logging.loglevel
        @param unittests: whether this instantiation is from a unittest
        @type unittests: boolean
        '''

        # Get local configuration info:
        self.config_info = ConfigInfo()

        # Access to common functionality:
        self.utils = Utilities()

        if user is None:
            user = config_info.default_user

        if host is None:
            host = config_info.default_host

        if db_pwd is None:
            db_pwd = self.utils.get_db_pwd(host, unittests=unittests)
        elif db_pwd == True:
            db_pwd = self.utils.get_db_pwd(host,
                                           ask_user=True,
                                           unittests=unittests)

        if target_db is None:
            target_db = self.config_info.canvas_db_aux

        self.target_db = target_db

        if num_to_keep is None:
            self.num_to_keep = BackupRemover.default_num_backups_to_keep
        else:
            self.num_to_keep = num_to_keep

        # Better name for tables to consider removing:
        tables_to_consider = tables

        # Unittests expect a db name in self.db:
        self.db = target_db

        self.db_obj = self.utils.log_into_mysql(user,
                                                db_pwd,
                                                db=target_db,
                                                host=host)

        self.utils.setup_logging(logging_level)
        if unittests:
            self.db_name = target_db
            return

        # Get names of all tables in the target_db
        all_tables = self.utils.get_existing_tables_in_dir(self.db_obj,
                                                           return_all=True,
                                                           target_db=target_db)

        # If caller specified only specific tables/backup tables to
        # remove, week out all table names not in caller's list:
        all_tables_to_consider = self.find_tables_to_consider(
            all_tables, tables_to_consider)

        self.remove_old_backups(all_tables_to_consider)
        self.close()

    #-------------------------
    # find_tables_to_consider
    #--------------

    def find_tables_to_consider(self, table_nm_list, specific_tables):
        '''
        Given mixed list of root and backup table names,
        and a list of specific table names, return a new
        list of effectively wanted tables.
        
        If a table root name occurs in specific_tables, then 
        all of that root name's backup versions in table_nm_list
        are retained. For backup table names in specific_tables,
        only those backup names are retained, not the others of
        the same root.
        
        If specific_tables is empty or None, table_nm_list is returned.   
        
        @param table_nm_list: list of all aux tables and backups
        @type table_nm_list: [str]
        @param specific_tables: possibly empty list of specific tables
            to remove. If empty list or none: return the full table_nm_list
        @type specific_tables: [str]
        '''

        if specific_tables is None:
            return table_nm_list
        if len(specific_tables) == 0:
            return table_nm_list

        # Collect all root names in specific_tables:
        roots = [
            tbl_name for tbl_name in specific_tables
            if self.utils.is_aux_table(tbl_name)
        ]

        # Remember root table names, so we can
        # keep all their backup names in the returned list:
        new_all = table_nm_list.copy()
        for tbl_nm in table_nm_list:
            # Is it a backup name whose root table name
            # is in the list to consider?
            if self.utils.is_backup_name(tbl_nm) and \
                self.utils.get_root_name(tbl_nm) in roots:
                # Keep the backup name:
                continue
            # At this pt the table must explicitly be
            # in the keep list to survive:
            if tbl_nm not in specific_tables:
                new_all.remove(tbl_nm)

        return new_all

    #-------------------------
    # remove_old_backups
    #--------------

    def remove_old_backups(self, all_table_names):
        '''
        Given a list of aux table names, find the backup 
        tables among them. Then delete all but the newest
        self.num_to_keep backup tables from the database. 
        
        @param all_table_names: list of table names to consider removing.
        @type all_table_names: [str]
        '''

        # Map table name --> list of its backups
        backup_tables = {}

        for tbl_nm in all_table_names:
            if self.utils.is_aux_table(tbl_nm):
                # Found root of an official table name.
                # (as apposed to a backup table)
                if tbl_nm not in backup_tables:
                    # Initialize an entry for this tbl:
                    backup_tables[tbl_nm] = []
                continue
            # Is it a backup table name?
            if self.utils.is_backup_name(tbl_nm):
                # Get root of the backup table name:
                root_nm = self.utils.get_root_name(tbl_nm)
                # Add to dict:
                try:
                    backup_tables[root_nm].append(tbl_nm)
                except KeyError:
                    # Hadn't seen this root table yet:
                    backup_tables[root_nm] = [tbl_nm]

        # Go through dict; for each table, sort its existing backup tables
        # by their dates, which are part of the names:

        # We'll modify backup_tables in the following
        # loop, so use a copy:
        backup_tables_copy = backup_tables.copy()

        for (tbl_nm, backup_names_list) in backup_tables_copy.items():
            # Sort the backup tbl names by their date, newest first:
            sorted_backups = sorted(backup_names_list,
                                    key=lambda name: self.get_date(name),
                                    reverse=True)
            backup_tables[tbl_nm] = sorted_backups

        # Go through, and remove all but the first num_to_keep
        # backup tables in each list:
        for backup_nm_list in backup_tables.values():
            # Chop off all names after the first num_to_keep names,
            # unless the name list is shorter than num_to_keep,
            # in which case we keep what we have:
            if len(backup_nm_list) <= self.num_to_keep:
                continue
            for to_delete in backup_nm_list[self.num_to_keep:]:
                self.db_obj.dropTable(to_delete)
                self.utils.log_info(f"Removing old backup table {to_delete}")

        self.utils.log_info(
            f"In {self.target_db}: no more than {self.num_to_keep} backup tables left per table."
        )

    #-------------------------
    # get_date
    #--------------

    def get_date(self, backup_tbl_nm):
        (_root, date_str,
         _dateobj) = self.utils.backup_table_name_components(backup_tbl_nm)
        return date_str

    #-------------------------
    # close
    #--------------

    def close(self):
        try:
            self.db_obj.close()
        except Exception:
            pass
    # Used for plotting.
    CATEGORIES_50_NICO = [
        'book', 'book', 'book', 'book', 'book', 'book', 'book', 'book', 'book',
        'book', 'hairbrush', 'hairbrush', 'hairbrush', 'hairbrush',
        'hairbrush', 'hairbrush', 'hairbrush', 'hairbrush', 'hairbrush',
        'hairbrush', 'hair clip', 'hair clip', 'hair clip', 'hair clip',
        'hair clip', 'hair clip', 'hair clip', 'hair clip', 'hair clip',
        'hair clip', 'flower', 'flower', 'flower', 'flower', 'flower',
        'flower', 'flower', 'flower', 'flower', 'flower', 'glass', 'glass',
        'glass', 'glass', 'glass', 'glass', 'glass', 'glass', 'glass', 'glass'
    ]

    # ------------------------------------ Initialization --------------------------------------------------------------

    rgwr = GammaGWR()
    utils = Utilities()
    learning = Learning()
    args = utils.parse_arguments()

    # Get data.
    original_data = utils.load_data(args.dataset).values
    original_data_normalized = utils.normalize_data(original_data,
                                                    DATA_DIMENSION)
    # original_data_normalized = original_data

    # Get training data.
    train_data = original_data_normalized[np.in1d(
        original_data_normalized[:, SESSION_COLUMN], TRAIN_SESSIONS)]
    train_data = train_data[np.in1d(train_data[:, INSTANCE_COLUMN],
                                    TRAIN_INSTANCES)]
    train_data = utils.reduce_number_of_frames(train_data, FACTOR_FRAMES)