Example #1
0
def resetPass(customCommand,test=False):
	from random import sample as randomize
	from random import random
	from os.path import exists
	# Opens the Adj, Adv, and Noun files as arrays
	av = open(sys.path[0]+"/Adv").read().splitlines()
	aj = open(sys.path[0]+"/Adj").read().splitlines()
	nn = open(sys.path[0]+"/Noun").read().splitlines()
	# Just for fun, some statistics!
	totalCombos = len(av)*len(aj)*len(nn)
	combosFormatted = "{:,}".format(totalCombos)
	avLengths=[]
	for item in av:
		avLengths.append(len(item))
	ajLengths=[]
	for item in aj:
		ajLengths.append(len(item))
	nnLengths=[]
	for item in nn:
		nnLengths.append(len(item))
	from statistics import mean,median,mode
	print("-"*25+"\n"+
		  "Total adverbs: "+str(len(av))+"\n"+
		  "Total adjectives: "+str(len(aj))+"\n"+
		  "Total nouns: "+str(len(nn))+"\n"+
		  "Total possible combinations: "+combosFormatted+" (not factoring in numbers)\n"+
		  "Shortest possible passphrase length: "+str(min(avLengths)+min(ajLengths)+min(nnLengths))+"\n"+
		  "Longest possible passphrase length: "+str(max(avLengths)+max(ajLengths)+max(nnLengths)+5)+"\n"+
		  "Mean passphrase length: "+str(int(mean(avLengths)+mean(ajLengths)+mean(nnLengths)+4))+"\n"+
		  "Median passphrase length: "+str(int(median(avLengths)+median(ajLengths)+median(nnLengths))+4)+"\n"+
		  "Mode passphrase length: "+str(int(mode(avLengths)+mode(ajLengths)+mode(nnLengths))+4)+"\n"+
		  "-"*25)
	# Randomize the order of the arrays
	av = randomize(av,len(av))
	aj = randomize(aj,len(aj))
	nn = randomize(nn,len(nn))
	# Pick a random word from each randomized array
	newAdverb = av[int(random()*len(av))].capitalize()
	newAdjective = aj[int(random()*len(aj))].capitalize()
	newNoun = nn[int(random()*len(nn))].capitalize()
	# Possibly add a random number from 1 to 10,000
	if maybeNumber():
		from math import ceil
		number = str(ceil(random()*10000))
	else:
		number = ''
	# Assemble the passphrase
	newPassphrase = number+newAdverb+newAdjective+newNoun
	#################################################################### Needs attention
	print("The new passphrase will be: "+newPassphrase)
	print("Total entropy: ~"+str(int(entropy(newPassphrase))))
	if customCommand == ' {PASSPHRASE}':
		print("Password display command not found. Aborting.")
		exit()
	if not test:
		import RouterPasswording
		RouterPasswording.newPassphrase(newPassphrase)
	from os import system as execute
	execute(customCommand.replace("{password}",newPassphrase).replace("{passphrase}",newPassphrase))
Example #2
0
def find_hit_regions(primer, alignment): #this one is for all the sequences in the alignment
    '''this is currently super inefficient... It basically does the work of primer_coverage() for every single possible
    frame in a sliding window for every sequence... If I'm ok with this I should just have this function return the
    number of mismatches for the positions which best match...  If I do that then I could have the amplicon length be
    something that was returned as well.....hmmm very tempting... I think I should do this.  what else besides amplicon
    length would this allow me to do?  I could also have it output potential mispriming sites, and then the amplicon
     length for the misprimed sites.... I could include a condition where it would print a warning if mispriming
     is likely, output a spreadsheet that tells you what sequences are likely to misprime, how big the amplicon
     for the mispriming would be...  But this mispriming would only be for these particular sequences that you are
     tyring to amplify, A much more liekly source of mispriming would just be other random genomic DNA.  A metagenome
     might be a good thing to run this, but that would really take a long time.....'''

    alignment_len = len(alignment[0])
    primer_length = len(primer)
    number_of_frames = (alignment_len - primer_length) + 1
    range_of_frames = range(0, number_of_frames)
    list_of_indexes = []
    first_indexes = []
    last_indexes = []
    frame_indexes = {}
    for frame in range_of_frames:
        frame_indexes[frame] = {}
        frame_indexes[frame]["first"] = frame
        frame_indexes[frame]["last"] = frame + primer_length

    hit_regions = {}
    for seq in alignment:
        sequences = {}
        for frame in frame_indexes:
            sequence = seq[frame_indexes[frame]["first"]:frame_indexes[frame]["last"]]
            #print(sequence)
            sequences[frame] = sequence

        number_mismatches = {}
        for key in sequences:
            number_mismatches[key] = 0
            for count, position in enumerate(sequences[key].upper()):
                #print(count, position)
                if position not in ambiguous_dna_values[primer[count]]:
                    number_mismatches[key] += 1
        indexes = frame_indexes[min(number_mismatches, key=number_mismatches.get)]
        hit_regions[seq.id] = indexes
        #print("number of sequences checked: {}".format(len(hit_regions)))
        #print("Percent complete: {}".format(len(hit_regions)/len(alignment)))
    #hit_regions = set(hit_regions)
    #print(hit_regions)

    starting = []
    ending = []
    for key in hit_regions:
        #print(key)
        starting.append(hit_regions[key]["first"])
        ending.append(hit_regions[key]["last"])
    #print(starting)
    #print(ending)
    starting = mode(starting)
    ending = mode(ending)
    return starting, ending
 def classify(self, text):
     features = self.find_features(text)
     votes = []
     for c in self._classifiers:
         v = c.classify(features)
         votes.append(v)
     choice_votes = votes.count(mode(votes))
     conf = choice_votes / float(len(votes))
     return (mode(votes), conf)
def main():
    print(stats.mean(range(6)))
    print(stats.median(range(6)))
    print(stats.median_low(range(6)))
    print(stats.median_high(range(6)))
    print(stats.median_grouped(range(6)))
    try:
        print(stats.mode(range(6)))
    except Exception as e:
        print(e)
    print(stats.mode(list(range(6)) + [3]))
    print(stats.pstdev(list(range(6)) + [3]))
    print(stats.stdev(list(range(6)) + [3]))
    print(stats.pvariance(list(range(6)) + [3]))
    print(stats.variance(list(range(6)) + [3]))
Example #5
0
def process_file(filename):
    # data = np.recfromcsv(filename, delimiter=',', filling_values=numpy.nan, case_sensitive=True, deletechars='', replace_space=' ')
    with io.open(filename, "r", encoding="UTF-8") as source_file:
        data_iter = csv.DictReader(source_file)
        # data = [data for data in data_iter]
        pricelist = []
        unitlist = []
        for line in data_iter:
            pricelist.append(float(line["product_price"]))
            unitlist.append(line["OKEI_name"])
        price_med = statistics.median(pricelist)
        unit_mode = statistics.mode(unitlist)
        # df = pd.DataFrame(data)

    med_outliers = []
    mod_outliers = []

    with io.open(filename, "r", encoding="UTF-8") as source_file:
        data_iter = csv.DictReader(source_file)
        for line in data_iter:
            if line["OKEI_name"] != unit_mode:
                mod_outliers.append(line)
            if (float(line["product_price"]) / price_med) > 3:
                med_outliers.append(line)

    return price_med, unit_mode, med_outliers, mod_outliers
Example #6
0
def print_posts(posts, post_type, print_num):
    price_list = []

    for post in posts:
        try:
            price_list.append(float(post.price))
        except ValueError:
            pass

    print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%{}'
          '%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%'.format(post_type))

    if price_list:
        print('NUM of POSTS: ', len(posts))
        print('MEAN: ', statistics.mean(price_list))
        print('MEDIAN: ', statistics.median(price_list))
        try:
            print('MODE: ', statistics.mode(price_list))
            print('STDEV: ', statistics.stdev(price_list))
        except statistics.StatisticsError:
            pass

    for post in posts[:print_num]:
        pprint(post.price)
        pprint(post.title)
        pprint(post.carrier)
        pprint(post.description)
        pprint('www.kijiji.ca' + post.link)
def print_stats(l):  # noqa: C901
    try:
        print("\tMean: {}".format(mean(l)))
    except StatisticsError as e:
        print("\tMean: {}".format(str(e)))

    try:
        print("\tMedian: {}".format(median(l)))
    except StatisticsError as e:
        print("\tMedian: {}".format(str(e)))

    try:
        print("\tMode: {}".format(mode(l)))
    except StatisticsError as e:
        print("\tMode: {}".format(str(e)))

    try:
        print("\tMax: {}".format(max(l)))
    except StatisticsError as e:
        print("\tMax: {}".format(str(e)))

    try:
        print("\tMin: {}".format(min(l)))
    except StatisticsError as e:
        print("\tMin: {}".format(str(e)))
Example #8
0
def statistics_for_time_points(time_points: list, header: str) -> str:
    time_in_seconds = [t.total_seconds() for t in time_points]

    mean_time = time.strftime("%H:%M", time.gmtime(st.mean(time_in_seconds)))
    median_time = time.strftime("%H:%M", time.gmtime(st.median(time_in_seconds)))
    std_deviation = time.strftime("%H:%M", time.gmtime(st.pstdev(time_in_seconds)))
    try:
        mode_time = time.strftime("%H:%M", time.gmtime(st.mode(time_in_seconds)))
    except st.StatisticsError:
        mode_time = "-"
    min_time = time.strftime("%H:%M", time.gmtime(min(time_in_seconds)))
    max_time = time.strftime("%H:%M", time.gmtime(max(time_in_seconds)))

    value_width = 5
    key_width = len(header) - value_width

    row_format = "\n{{:<{key_width}}}{{:>{value_width}}}".format(key_width=key_width, value_width=value_width)
    delimiter = "\n" + "-" * len(header)

    stats_string = header
    stats_string += delimiter

    stats_string += row_format.format("Mean:", mean_time)
    stats_string += row_format.format("Median:", median_time)
    stats_string += row_format.format("Standard deviation:", std_deviation)
    stats_string += row_format.format("Mode:", mode_time)
    stats_string += row_format.format("Earliest:", min_time)
    stats_string += row_format.format("Latest:", max_time)
    stats_string += delimiter
    stats_string += "\n{} values".format(len(time_in_seconds))
    return stats_string
Example #9
0
def mode(RGB_list, count):
    ''' Gets mode element of a list given'''
    
    temp = []
    for index in RGB_list:
        temp.append(index[count])
    return statistics.mode(temp)
Example #10
0
 def classify(self, features):
     votes = []
     for c in self._classifiers: #c for classifiers
         v = c.classify(features) #v for votes
         votes.append(v)
     #print(votes)
     return mode(votes)
 def classify(self, features):
     votes = []
     for c in self._classifiers:
         v = c.classify(features)
         votes.append(v)
     result = mode(votes)
     return result.lower()
Example #12
0
	def get_3p_domain_stats(self, num_pages, tld_filter = None):
		"""
		determines basic stats for the number of 3p domains contacted per-page
		
		note this is distinct domain+pubsuffix, not fqdns (e.g. 'sub.example.com' 
			and sub2.example.com' only count as 'example.com')

		if tracker_domains have been set the stats will reflect only third-parties
			which have crossed the threshold (see get_tracker_domains())
		"""

		# each page id corresponds to a list of domains belonging to page elements
		page_id_to_domains_dict = {}

		# run query to get all page id, page domain, and element domain entries
		# there is no third-party filter so each page will have at least one entry for first-party domain
		for row in self.sql_driver.get_page_id_3p_element_domain_pairs(tld_filter):
			page_id 		= row[0]
			element_domain 	= row[1]

			# if the page id is not yet seen enter the current element as a fresh list
			#	otherwise, we add to the existing list
			# in both cases, if there is a tracker_domain list we only add
			#	domains that are in the list
			if page_id not in page_id_to_domains_dict:
				if self.tracker_domains:
					if element_domain in self.tracker_domains:
						page_id_to_domains_dict[page_id] = [element_domain]
				else:
					page_id_to_domains_dict[page_id] = [element_domain]
			else:
				if self.tracker_domains:
					if element_domain in self.tracker_domains:
						page_id_to_domains_dict[page_id] = page_id_to_domains_dict[page_id] + [element_domain]
				else:
					page_id_to_domains_dict[page_id] = page_id_to_domains_dict[page_id] + [element_domain]

		# now we determine the number of domains each page is connected to by looking at len of list of 3p domains
		per_page_3p_element_counts = []
		for page_id in page_id_to_domains_dict:
			per_page_3p_element_counts.append(len(page_id_to_domains_dict[page_id]))

		# pages that have no 3p elements are not yet in our counts
		# 	so for all uncounted pages we add in zeros
		uncounted_pages = num_pages - len(per_page_3p_element_counts)
		while uncounted_pages > 0:
			uncounted_pages -= 1
			per_page_3p_element_counts.append(0)

		# mean and median should always be ok
		mean 	= statistics.mean(per_page_3p_element_counts)
		median 	= statistics.median(per_page_3p_element_counts)

		# but mode can throw an error, so catch here
		try:
			mode = statistics.mode(per_page_3p_element_counts)
		except:
			mode = None

		return(mean, median, mode)
Example #13
0
def basic_stats(total_data):
    mean = statistics.mean(total_data)
    median = statistics.median(total_data)
    mode = statistics.mode(total_data)
    standard_dev = statistics.stdev(total_data)

    return [mean, median, mode, standard_dev]
Example #14
0
def diff1(listy):
    pie=listy
    awe=[]
    d=reduce(gcd,listy)
    for elem in listy:
        awe.append(elem/d)
    listy=awe
    new=[listy]
    old=[pie]
    for elem in listy:
        new.append(diff(new[-1]))
    for elem in listy:
        old.append(diff(old[-1]))
    new=new[0:-1]
    old=old[0:-1]
    loop=-1
    oth=0
    for elem in new:
        loop=loop+1
        if elem.count(elem[0])==len(elem):
            me=loop
            oth=1
    if oth==1:
        old=old[0:me]
        old=list(reversed(old))
        start=new[0][0]
        loop=0
        for elem in old:
            loop=loop+elem[-1]
        return(loop)
    else:
        return(mode(pie))
    def validate_array(self, arr):
        '''
        given arr
        if mean and stdev of *arr* is close to target_mean and target_stdev,
        return true
        '''

        #print('there are {} elements'.format(len(arr)))
        mean = statistics.mean(arr)
        #median = statistics.median(arr)
        stdev = statistics.stdev(arr)
        mode = 0
        # most time we could not get *mode* from this array, pass it
        try:
            mode = statistics.mode(arr)
        except statistics.StatisticsError:
            pass
        #print('median: {:.3f}\n'.format(media))
        #print('mean: {:.3f}\nstdev: {:.3f}\n'.format(mean, stdev))
        if abs(self.target_mean[0] - mean) < self.target_mean[1] \
            and abs(self.target_stdev[0] - stdev) < self.target_stdev[1]:
            self.result_mean = mean
            self.result_stdev = stdev
            self.result_mode = mode
            return True

        return False
 def classify(self, features):
     votes = []
     for c in self._classifiers:
         v = c.classify(features)
         votes.append(v)
         print(v)
     return mode(votes)
Example #17
0
	def vote(self, training_set):
		votes = []
		for c in self.classifiers:
			v = c.classify(training_set)
			votes.append(v)

		return mode(votes)
Example #18
0
def linear(y):
    x=list(range(1,len(y)+1))
    xp=6
    yn=diff(y)
    ynn=diff(yn)
    cof=np.polyfit(x,y,1)
    #print(cof)
    
    yon=np.polyval(cof,x)
    
    newlist=0
    newlist2=0
    loop=-1
    for elem in y:
        loop=loop+1
        newlist=newlist+(elem-yon[loop])**2
        newlist2=newlist2+(elem-np.mean(y))**2
    newlist=(1-newlist/newlist2)*100
    predict=np.polyval(cof,xp)
    
    if newlist<99:
        try:
            predict=mode(y)
        except statistics.StatisticsError:
            predict=y[-1]
    yon=list(map(int,list(map(round,yon))))
    #print(yn[-1])
    #plt.plot(yon)
    #plt.plot(y)
    #print(yon,y)
    return(round(float(predict)))
def count_mislabels(labels, true_labels):
    # 2017-08-17: I will make the assumption that clusters have only 2 values.
    # clusters = np.unique(true_labels)
    # mislabels = 0
    # for curr_clust in clusters:
    #     print("for label", curr_clust)
    #     print("\t", labels[(true_labels == curr_clust)])
    #     compare_to = mode(labels[(true_labels == curr_clust)])
    #     print("\tcompare to:", compare_to, "mislables: ", np.count_nonzero(labels[(true_labels == curr_clust)] != compare_to))
    #     mislabels += np.count_nonzero(labels[(true_labels == curr_clust)] != compare_to)

    set_a = labels[true_labels == 0]
    set_b = labels[true_labels == 1]

    if len(set_a) <= len(set_b):
        shorter = set_a
        longer = set_b
    else:
        shorter = set_b
        longer = set_a

    long_mode = mode(longer)  # this what the label of the longer cluster should be.
    short_mode = 1 if long_mode == 0 else 0  # Choose the other value for the label of the shorter cluster

    # start with the longer vector:
    # print("The long set is", longer, "it has", np.count_nonzero(longer != long_mode), 'mislabels.')
    # print("The short set is", shorter, "it has", np.count_nonzero(shorter != short_mode), 'mislabels.')

    # np.count_nonzero(longer != long_mode) + np.count_nonzero(shorter != short_mode)

    return np.count_nonzero(longer != long_mode) + np.count_nonzero(shorter != short_mode)
Example #20
0
def stats_helper(list):
    """
    https://docs.python.org/3/library/statistics.html#statistics.pvariance
    :param list:
    :return:
    """

    mean = statistics.mean(list)
    mode = None

    try:
        mode = statistics.mode(list)
    except statistics.StatisticsError:
        # no unique mode
        pass

    return {
        'mean': mean,
        'variance': statistics.pvariance(list, mu=mean),
        'standard_deviation': statistics.pstdev(list, mu=mean),
        'median': statistics.median(list),
        'median_low': statistics.median_low(list),
        'median_high': statistics.median_high(list),
        'median_grouped': statistics.median_grouped(list),
        'mode': mode
    }
Example #21
0
def run(data):
    f = open("analyzer.log", 'a+')
    c = costs(data)
    total = total_cost(data)
    f.write("\n############# COST #############\n")
    f.write("Total Cost : {0}\n".format(total))
    f.write("Total Cost Mean: {0}\n".format(mean(c)))
    f.write("Total Cost Median: {0}\n".format(median(c)))
    f.write("Total Cost Mode: {0}\n".format(mode(c)))
    f.write("Total Cost Variance: {0}\n".format(variance(c)))

    cost_action = action(data)
    f.write("Cost by Action: \n")
    for k, v in cost_action.iteritems():
        f.write("\t{0} -> {1} units\n".format(k, v))

    f.write("Percentage Cost by Action: \n")
    for k, v in cost_action.iteritems():
        f.write("\t{0} -> {1} %\n".format(k, round(((v * 100.) / total), 2)))

    f.write("Cost Variance by Action: \n")
    for k, v in cost_action.iteritems():
        c_action = costs_action(data, k)
        if len(c_action) > 1:
            f.write("\t{0} -> {1} units\n".format(k, round(variance(c_action), 2)))
        else:
            f.write("\t{0} -> {1} units\n".format(k, round(c_action[0], 2)))

    key_max, max_value = max_action_value(cost_action)
    f.write("More Expensive Action by value: {0} -> {1}\n".format(key_max[0], cost_action.get(key_max[0])))

    key_max, max_value = max_action_percentage(cost_action, total)
    f.write("More Expensive Action by percentage: {0} -> {1} %\n".format(key_max, round(max_value, 2)))

    f.close()
Example #22
0
 def classify(self,features):
     votes=[]
     for c in self._classifier:
         v=c.classify(features)
         votes.append(v)
     votes.append("pos")    
     return mode(votes)
Example #23
0
def main():
    dailymotion = acquire_dailymotion()
    print "Dailymotion"
    print "total videos: " + str(len(dailymotion[0]))
    print "mean views: " + str(statistics.mean(dailymotion[0]))
    print "median views: " + str(statistics.median(dailymotion[0]))
    print "STD views: " + str(statistics.stdev(dailymotion[0]))
    print "Average Date: " + str(convert_to_datetime(statistics.mean(dailymotion[1])))
    print "Median Date: " + str(convert_to_datetime(statistics.median(dailymotion[1])))
    print "Average Lengths: " + str(statistics.mean(dailymotion[2]))
    print "Median Lengths: " + str(statistics.median(dailymotion[2]))
    print "STD Lengths: " + str(statistics.stdev(dailymotion[2]))
    print "Top 20 most used word in title: "
    word_count_dailymotion("title")
    print "Top 20 most used word in description:"
    word_count_dailymotion("description")
    youtube = acquire_youtube()
    print "YouTube"
    print "total videos: " + str(len(youtube[0]))
    print "mean views: " + str(statistics.mean(youtube[0]))
    print "median views: " + str(statistics.median(youtube[0]))
    print "STD views: " + str(statistics.stdev(youtube[0]))
    print "Average Date: " + str(convert_to_datetime(statistics.mean(youtube[1])))
    print "Median Date: " + str(convert_to_datetime(statistics.median(youtube[1])))
    print "Video Definition: " , str(statistics.mode(youtube[2])) , " - " , str(youtube[2].count(statistics.mode(youtube[2]))) ,"/" , str(len(youtube[2]))
    print "Average Lengths: " + str(statistics.mean(youtube[3]))
    print "Median Lengths: " + str(statistics.median(youtube[3]))
    print "STD Lengths: " + str(statistics.stdev(youtube[3]))    
    print "Top 20 most used word in title: "
    word_count_yt("title")
    print "Top 20 most used words in description: "
    word_count_yt("description")
    client.close()
 def confidence(self, features):
     votes = []
     for c in self._classifiers:
         v = c.classify(features)
         votes.append(v)
     choice_votes = votes.count(mode(votes))
     conf = choice_votes / len(votes)
     return conf
Example #25
0
 def processPackets(packet_cache, strim, rtrim):
     sent_tally = []
     rcvd_tally = []
     analyses = []
     for probe_id,packets in packet_cache:
         try:
             analysis,s,r = analyzePackets(packets, timestamp_precision)
             analysis['probe_id'] = probe_id
             analyses.append(analysis)
             sent_tally.append(s)
             rcvd_tally.append(r)
         except Exception as e:
             #traceback.print_exc()
             sys.stderr.write("WARN: couldn't find enough packets for probe_id=%s\n" % probe_id)
     db.addTrimAnalyses(analyses)
     db.conn.commit()
     return statistics.mode(sent_tally),statistics.mode(rcvd_tally)
Example #26
0
def data_stat(rate):
    """Print mean, median, mode, standard deviation, max, and min of data."""
    print('Mean:', stat.mean(rate))
    print('Median:', stat.median(rate))
    print('Mode:', stat.mode(rate))
    print('S.D.:', stat.stdev(rate))
    print('Max:', max(rate))
    print('Min:', min(rate))
 def confidence(self, features):
     votes = []
     for c in self._classifiers:
         v = c.classify(features)
         votes.append(v)
     choice_votes = votes.count(mode(votes))
     conf = float(choice_votes) / float(len(votes))
     return format(conf,'.2f')
Example #28
0
 def confidence(self, features):
     votes = []
     for c in self._classifiers:
         v = c.classify(features)
         votes.append(v)
     choice_votes = votes.count(mode(votes))  # how many occurences of most popular vote
     conf = choice_votes / len(votes)  # % that was the chosen category
     return conf
Example #29
0
	def unanimity(self, training_set):
		votes = []
		for c in self.classifiers:
			v = c.classify(training_set)
			votes.append(v)

		choice = votes.count(mode(votes))
		return choice / len(votes)
Example #30
0
	def get_3p_cookie_stats(self, num_pages, tld_filter = None):
		"""
		determines basic stats for the number of 3p cookies contacted per-page
			note that a single 3p many set more than one cookie

		if tracker_domains have been set the stats will reflect only third-parties
			which have crossed the threshold (see get_tracker_domains())
		"""

		# each page id corresponds to a list of cookie ids
		page_id_to_cookie_id_dict = {}

		# run query to get all page id, 3p cookie id, 3p cookie domain entries
		for row in self.sql_driver.get_page_id_3p_cookie_id_3p_cookie_domain(tld_filter):
			page_id 		= row[0]
			cookie_id		= row[1]
			cookie_domain 	= row[2]

			# if the page id is not yet seen enter the current cookie id as a fresh list
			#	otherwise, we add to the existing list
			# in both cases, if there is a tracker_domain list we do not count cookies
			#	set by domains which are not trackers 
			if page_id not in page_id_to_cookie_id_dict:
				if self.tracker_domains:
					if cookie_domain in self.tracker_domains:
						page_id_to_cookie_id_dict[page_id] = [cookie_id]
				else:
					page_id_to_cookie_id_dict[page_id] = [cookie_id]
			else:
				if self.tracker_domains:
					if cookie_domain in self.tracker_domains:
						page_id_to_cookie_id_dict[page_id] = page_id_to_cookie_id_dict[page_id] + [cookie_id]
				else:
					page_id_to_cookie_id_dict[page_id] = page_id_to_cookie_id_dict[page_id] + [cookie_id]

		# determine the number of 3p cookies each page has by looking at len of list of cookie ids
		per_page_3p_cookie_counts = []
		for page_id in page_id_to_cookie_id_dict:
			per_page_3p_cookie_counts.append(len(page_id_to_cookie_id_dict[page_id]))

		# pages that have no 3p cookies are not yet in our counts
		# so for all uncounted pages we add in zeros
		uncounted_pages = num_pages - len(per_page_3p_cookie_counts)
		while uncounted_pages > 0:
			uncounted_pages -= 1
			per_page_3p_cookie_counts.append(0)

		# mean and median should always be ok
		mean 	= statistics.mean(per_page_3p_cookie_counts)
		median 	= statistics.median(per_page_3p_cookie_counts)

		# but mode can throw an error, so catch here
		try:
			mode = statistics.mode(per_page_3p_cookie_counts)
		except:
			mode = None

		return(mean, median, mode)
df["Height (cm)"] = df["Height (cm)"].astype(float)

print(df.info())

print(df['Height (cm)'].unique())

print(df['Bowling Style'].unique())

from statistics import mean
df["Height (cm)"].fillna(df["Height (cm)"].mean(), inplace=True)

df.head()

import statistics
from statistics import mode
print(statistics.mode(df['Bowling Style']))

df['Bowling Style'].fillna('Right-arm fast-medium', inplace=True)

print(df.isnull().sum())

from google.colab import files
df.to_csv('421_bowling_missingValues.csv', index=False)
files.download('421_bowling_missingValues.csv')

df["Bowling Style"] = df["Bowling Style"].astype('category')
df["Bowling Style"] = df["Bowling Style"].cat.codes

df["Mat"] = df["Mat"].astype(int)
df["Inns"] = df["Inns"].astype(int)
df["Balls"] = df["Balls"].astype(int)
Example #32
0
                       'Season', 'Age', 'Dis', 'Trau', 'Inter', 'Fever',
                       'FreqAlc', 'SmokingH', 'SitHours', 'Output'
                   ])
ilosc_danych: int = len(dane)
print(dane)
# --

print("\nCecha ilościowa: Season")
print("""Opis: Sezon, w którym przeprowadzono analizę.
         1) zima,
         2) wiosna,
         3) lato,
         4) jesień.
         (-1, -0,33, 0,33, 1)""")
season = dane.Season
dominant = statistics.mode(season)
licznik = len([1 for i in season if i == dominant])
print("Dominanta: ", dominant)
print("Liczebność: ", licznik)
print("Częstość: ", licznik / ilosc_danych)

print("\nCecha ilościowa: Age")
print("Opis: Wiek w momencie analizy. 18–36 (0, 1)")
age = round(dane.Age * 18) + 18
print("Średnia: ", np.mean(age))
print("Odchylenie standardowe: ", np.std(age))
print("Mediana: ", np.median(age))
print("Maksimum: ", np.amax(age))
print("Minimum: ", np.amin(age))

print("\nCecha jakościowa: IfDiseases")
Example #33
0
        emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face))
        emotion_text = emotion_labels[emotion_label_arg]
        emotion_window.append(emotion_text)

        rgb_face = np.expand_dims(rgb_face, 0)
        rgb_face = preprocess_input(rgb_face, False)
        gender_prediction = gender_classifier.predict(rgb_face)
        gender_label_arg = np.argmax(gender_prediction)
        gender_text = gender_labels[gender_label_arg]
        gender_window.append(gender_text)

        if len(gender_window) > frame_window:
            emotion_window.pop(0)
            gender_window.pop(0)
        try:
            emotion_mode = mode(emotion_window)
            gender_mode = mode(gender_window)
        except:
            continue

        if gender_text == gender_labels[0]:
            color = (0, 0, 255)
        else:
            color = (255, 0, 0)

        draw_bounding_box(face_coordinates, rgb_image, color)
        draw_text(face_coordinates, rgb_image, gender_mode,
                  color, 0, -20, 1, 1)
        draw_text(face_coordinates, rgb_image, emotion_mode,
                  color, 0, -45, 1, 1)
Example #34
0
import pandas as pd
import statistics
import csv

df = pd.read_csv("height-weight.csv")
heightlist = df["Height(Inches)"].to_list()
weightlist = df["Weight(Pounds)"].to_list()

heightmean = statistics.mean(heightlist)
heightmedian = statistics.median(heightlist)
heightmode = statistics.mode(heightlist)
heightstdev = statistics.stdev(heightlist)

print(heightmean)
print(heightmedian)
print(heightmode)
print(heightstdev)

firststart = heightmean - heightstdev
firstend = heightmean + heightstdev

secondstart = heightmean - 2*heightstdev
secondend = heightmean + 2*heightstdev

thirdstart = heightmean - 3*heightstdev
thirdend = heightmean + 3*heightstdev

first = [result for result in heightlist if result > firststart and result < firstend]
second = [result for result in heightlist if result > secondstart and result < secondend]
third = [result for result in heightlist if result > thirdstart and result < thirdend]
Example #35
0
def binary_classifier():
    train = pd.read_csv('dataset/train2.tsv', delimiter='\t', encoding='utf-8')
    test = pd.read_csv('dataset/test2.tsv', delimiter='\t', encoding='utf-8')
    x_test = test.iloc[:, [3, 15]]
    x_test = np.asarray(x_test)
    x_test = x_test.tolist()
    X_test = []
    for d in x_test:
        d = str(d[0])
        d = d + str(d[1])
        X_test.append(d)

    y_test = test.iloc[:, 2:3]
    y_test = np.asarray(y_test)
    x_train = train.iloc[:, [3, 15]]
    x_train = np.asarray(x_train)
    x_train = x_train.tolist()
    X = []
    for d in x_train:
        d = str(d[0])
        d = d + str(d[1])
        X.append(d)

    y_train = train.iloc[:, 2:3]
    y_train = np.asarray(y_train)
    f = open("statement.txt", "r")
    statement = f.read()
    f = open("justification.txt", "r")
    justification = f.read()
    sample_text = statement + justification
    # Input Word Embeddings
    ctv = CountVectorizer(analyzer='word',
                          token_pattern=r'\w{1,}',
                          ngram_range=(1, 3),
                          stop_words='english')
    ctv.fit(list(X) + list(X_test))
    sample = []
    sample.append(str(sample_text))
    xtrain_ctv = ctv.transform(X)
    xtest_ctv = ctv.transform(X_test)
    xsample = ctv.transform(sample)
    y_train_binary = convert_to_bin(y_train)
    y_train_binary = np.asarray(y_train_binary)
    # Output (True:1, False:0)
    le = preprocessing.LabelEncoder()
    y_train_binary = le.fit_transform(y_train_binary)
    print("Logistic Regression")
    logmodel = LogisticRegression()
    logmodel.fit(xtrain_ctv, y_train_binary)

    # NaiveBayes
    print("NB")
    nbmodel = MultinomialNB()
    nbmodel.fit(xtrain_ctv, y_train_binary)

    # Deep Learning Model
    print("GRU")
    # Glove
    embeddings_index = {}
    f = open('glove.42B.300d.txt', encoding='utf8')
    for line in tqdm(f):
        values = line.split()
        word = ''.join(values[:-300])
        coefs = np.asarray(values[-300:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

    xtrain_glove = [sent2vec(x) for x in tqdm(X)]
    xtest_glove = [sent2vec(x) for x in tqdm(X_test)]

    # Scaling
    scl = preprocessing.StandardScaler()
    xtrain_glove_scl = scl.fit_transform(xtrain_glove)
    xtest_glove_scl = scl.transform(xtest_glove)

    token = text.Tokenizer(num_words=None)
    max_len = 300

    token.fit_on_texts(list(X) + list(X_test))
    xtrain_seq = token.texts_to_sequences(X)
    xvalid_seq = token.texts_to_sequences(X_test)
    xsample_seq = token.texts_to_sequences(sample)

    # zero pad the sequences
    xtrain_pad = sequence.pad_sequences(xtrain_seq, maxlen=max_len)
    xtest_pad = sequence.pad_sequences(xvalid_seq, maxlen=max_len)
    xsample_pad = sequence.pad_sequences(xsample_seq, maxlen=max_len)
    word_index = token.word_index

    embedding_matrix = np.zeros((len(word_index) + 1, 300))
    for word, i in tqdm(word_index.items()):
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

    nn = Sequential()
    nn.add(
        Embedding(len(word_index) + 1,
                  300,
                  weights=[embedding_matrix],
                  input_length=max_len,
                  trainable=False))
    nn.add(SpatialDropout1D(0.3))
    nn.add(GRU(300, dropout=0.3, recurrent_dropout=0.3, return_sequences=True))
    nn.add(GRU(300, dropout=0.3, recurrent_dropout=0.3))
    nn.add(Dense(1024, activation='relu'))
    nn.add(Dropout(0.8))
    nn.add(Dense(1024, activation='relu'))
    nn.add(Dropout(0.8))
    nn.add(Dense(1, activation='sigmoid'))
    nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    nn = load_model('gru_bin.h5')

    # Ensemble
    print("Ensemble")
    pred1_test = logmodel.predict(xsample)
    pred2_test = nbmodel.predict(xsample)
    pred3_test = nn.predict(xsample_pad)

    pred3_bin = []
    for i in pred3_test:
        if i >= 0.55:
            pred3_bin.append(1)
        else:
            pred3_bin.append(0)

    data = [pred1_test[0], pred2_test[0], pred3_bin[0]]
    output = mode(data)
    labels = list(le.inverse_transform([0, 1]))
    output_file = open("binary_output.txt", "w")
    output_file.write("Output is " + str(output))
    output_file.write("\n")
    output_file.write("0,1 correspond to " + str(labels) + " respectively")
    output_file.close()
Example #36
0
 def Calcular_Mode(self, pdValores):
     nbrMode = stats.mode(pdValores)
     return nbrMode
Example #37
0
            except:
                continue

            gray_face = preprocess_input(gray_face, True)
            gray_face = np.expand_dims(gray_face, 0)
            gray_face = np.expand_dims(gray_face, -1)
            emotion_prediction = emotion_classifier.predict(gray_face)
            emotion_probability = np.max(emotion_prediction)
            emotion_label_arg = np.argmax(emotion_prediction)
            emotion_text = emotion_labels[emotion_label_arg]
            emotion_window.append(emotion_text)

            if len(emotion_window) > frame_window:
                emotion_window.pop(0)
            try:
                emotion_mode = mode(emotion_window)
            except:
                continue
            if emotion_text == 'angry':
                color = emotion_probability * np.asarray((255, 0, 0))
            elif emotion_text == 'sad':
                if freq['min'] < -0.13 and freq['max'] > 0.2:
                    print("CRYING")
                    break
                color = emotion_probability * np.asarray((0, 0, 255))

            elif emotion_text == 'happy':
                color = emotion_probability * np.asarray((255, 255, 0))
            elif emotion_text == 'surprise':
                color = emotion_probability * np.asarray((0, 255, 255))
            else:
import csv
import statistics

with open('car_data.csv') as csv_file:
    reader = csv.reader(csv_file)
    for cat in reader:
        prices = [int(price[1:].replace(',', ''))
                  for i, row in enumerate(reader)
                  if i is 0
                  for price in row[1:]
                  if price != 'N/A' and price != '']

    mean = statistics.mean(prices)
    median = statistics.median(prices)
    mode = statistics.mode(prices)

    print(f'Mean: {mean}, Median: {median}, Mode: {mode}')
import plotly.express as px
import plotly.figure_factory as ff
import statistics

dice_result = []

for i in range(0, 1000):
    dice1 = random.randint(1, 6)
    dice2 = random.randint(1, 6)
    dice_result.append(dice1 + dice2)

mean = sum(dice_result) / len(dice_result)
print(mean)
median = statistics.median(dice_result)
print(median)
mode = statistics.mode(dice_result)
print(mode)
std_deviation = statistics.stdev(dice_result)
print(std_deviation)

first_std_deviation_start, first_std_deviation_end = mean - std_deviation, mean + std_deviation
second_std_deviation_start, second_std_deviation_end = mean - (
    2 * std_deviation), mean + (2 * std_deviation)
third_std_deviation_start, third_std_deviation_end = mean - (
    3 * std_deviation), mean + (3 * std_deviation)

list_of_data_within_1_std_deviation = [
    result for result in dice_result
    if result > first_std_deviation_start and result < first_std_deviation_end
]
list_of_data_within_2_std_deviation = [
Example #40
0
 def mode(self):
     return statistics.mode(self.numbers)
def run(workDir):
    allLines = ""
    with open(workDir+"/wordsList.txt",mode="r") as input:
    	allLines = input.read()
    
    allWords = allLines.split("\n")
    
    if not os.path.exists(workDir+"/wordStats"):
        os.system('mkdir '+workDir+"/wordStats")
    
    for word in allWords:
    	if len(word) == 0 or len(word) > 250:
    		continue
    	if not os.path.exists(workDir+"/words/"+word+".csv"):
    		continue
    	with open(workDir+"/words/"+word+".csv",mode="r") as input:
    		allLines = input.read()
    
    	lines = allLines.split("\n")
    	scoreHash = {}
    	scoreArr = []
    	for line in lines:
    		arr = line.split(",")
    		if len(arr) != 6:
    			continue
    		if float(arr[1]) == 0:
    			continue
    		if arr[5] not in scoreHash:
    			scoreHash[float(arr[5])] = []
    			scoreArr.append(float(arr[5]))
    		scoreHash[float(arr[5])].append(float(arr[1]))
    
    	scoreArr.sort()
    	targets = []
    	set = []
    	i = -1.0
    	allOutput = []
    	while i < 1:
    		targets = []
    		set = []
    		for j in range(0,len(scoreArr)):
    			if scoreArr[j] >= i-0.05 and scoreArr[j] < i+0.05:
    				targets.append(scoreArr[j])
    		for j in range(0,len(targets)):
    			for k in range(0,len(scoreHash[targets[j]])):
    				set.append(scoreHash[targets[j]][k])
    		if len(set) == 0:
    			allOutput.append(str(round(i,1))+",0,0,0,0,0,0")
    		elif len(set) == 1:
    			allOutput.append(str(round(i,1))+","+str(len(set))
    				+","+str(statistics.mean(set))
    				+","+str(statistics.median(set))
    				+",0,0,"+str(statistics.mode(set)))
    		else:
    			modeVals = mode(set)
    			modeVal = max(modeVals.mode)
    			allOutput.append(str(round(i,1))+","+str(len(set))
    				+","+str(statistics.mean(set))
    				+","+str(statistics.median(set))
    				+","+str(statistics.stdev(set))
    				+","+str(skew(set))
    				+","+str(modeVal) )
    		i += 0.1
    	with open(workDir+"/wordStats/"+word+".stats.csv",mode="w") as output:
    		output.write("\n".join(allOutput))
Example #42
0

func19(numbs)

#21
list=[]
for x in range(10):
    list.append(random.randint(25,110))
print(list)

#24
random.shuffle(numbs)
print("shuffle ",numbs)

#27
list27=[1, 5, 23, 5, 12, 2, 5, 1, 18, 5]
print("mode ",statistics.mode(list27))

#29
st="python php pascal javascript java c++"
list29=st.split()
print(list29)
print(max(list29, key=len))

#30
list=[]
list27=[1, 5, 23, 5, 12, 2, 5, 1, 18, 5]
list27.sort()
print(sum(list27)/len(list27))
print(statistics.median(list27))
print(statistics.mode(list27))
Example #43
0
    def descriptiveAnalysis(self, x, isSample=True):
        '''Performs basic analysis on a data set,
           calculates mean, median, standard deviation, etc.

           Inputs
           -------
           x : numpy.array object
               The dataset
           isSample : Boolean (True/False)
               Some statistical calculations depend upon whether
               the data is sample or population data
           showOutput : Boolean (True/False)
               Whether or not to print out the resulting statistics.
               Otherwise, the results will only be returned
               as a dictionary.

           Outputs
           -------
           stats : dictionary
               Dictionary containing the calculated statistics
        '''
        self.data = x

        # sample size
        self.size = x.size

        # range data
        self.min = np.min(x)
        self.max = np.max(x)
        self.range = self.max - self.min

        # Quartiles
        self.q1 = np.percentile(x, 25)
        self.q2 = np.percentile(x, 50)
        self.q3 = np.percentile(x, 75)
        self.interquartileRange = self.q3 - self.q1

        self.mean = np.mean(x)
        # Mode (most common number) is a robust measure of central location
        # for nominal level data
        try:
            self.mode = mode(x)
        except StatisticsError as e:
            logging.exception(e)
            # TODO what to do in this scenario? Need to set mode

        # The median is a robust measure of central location for ordinal level
        # data, and is less affected by the presence of outliers in your data.
        # When the number of data points is odd, the middle data point is
        # returned. When the number of data points is even, the median is
        # interpolated by taking the average of the two middle values
        #
        # This is suited for when your data is discrete, and you don’t mind
        # that the median may not be an actual data point.
        #
        # If your data is ordinal (supports order operations) but not numeric
        # (doesn’t support addition), you should use median_low() or
        # median_high() instead.
        self.median = np.median(x)

        if isSample:
            ## Sample Data
            self.stdev = stdev(x)
            # Variance, or second moment about the mean, is a measure of the
            # variability (spread or dispersion) of data. A large variance
            # indicates that the data is spread out; a small variance indicates
            # it is clustered closely around the mean.
            self.variance = variance(x)  # == stdev**2
        else:
            ## Population Data
            self.stdev = pstdev(x)
            self.variance = pvariance(x)

        # Pearson's second skewness coefficient (median skewness)
        self.skewCoefficient = 3 * (self.mean - self.median) / self.stdev
        return self
                # Using cv2.putText() method
                # frame = cv2.putText(frame, 'Left', org, font, fontScale, color, thickness, cv2.LINE_AA)
                leftwidth.append(w)
                leftheight.append(h)
            elif x > b / 2:
                # frame = cv2.putText(frame, 'Right', org, font, fontScale, color, thickness, cv2.LINE_AA)
                rightwidth.append(w)
                rightheight.append(h)
cap.release()

if b in leftwidth:
    leftwidth.remove(b)
if a in leftheight:
    leftheight.remove(a)

lw = statistics.mode(leftwidth)
lh = statistics.mode(leftheight)

if b in rightwidth:
    rightwidth.remove(b)
if a in rightwidth:
    rightheight.remove(a)

rw = statistics.mode(rightwidth)
rh = statistics.mode(rightheight)

avgw = (lw + rw) / 2
avgh = (lh + rh) / 2

print(int(avgw), int(avgh))
# t = int(time.time())*1000
Example #45
0
}, {
    "$sort": {
        "avgCount": -1
    }
}]
sumByZip = db.crimes.aggregate(pipeline)

averages = []

for zipEntry in sumByZip:
    print zipEntry["_id"], "=", zipEntry["avgCount"]
    averages.append(zipEntry["avgCount"])

medianValue = statistics.median(averages)
avg = statistics.mean(averages)
mode = statistics.mode(averages)
stdev = statistics.stdev(averages, medianValue)

print "=============================="
print "mean/avg crimes per zipcode", avg
print "median crimes per zipcode", medianValue
print "mode crimes per zipcode", mode
print "stdev crimes per zipcode", stdev

# db.Listing.find().forEach(function(item){
#     db.Listing.update({_id: item._id}, {$set: { LowerCaseAddress: item.Address.toLowerCase() }})
# })


def read_file(filename):
    with open(filename, 'r') as f:
Example #46
0
    skewness.shape[0]))

# Now let's apply the box-cox transformation to correct for skewness
skewed_features = skewness.index
lam = 0.15
for feature in skewed_features:
    all_data[feature] = boxcox1p(all_data[feature], lam)

# Creating a new feature: Total Square Footage
all_data['TotalSF'] = all_data['TotalBsmtSF'] + all_data[
    '1stFlrSF'] + all_data['2ndFlrSF']

# Identifying features where a class is over 97% represented
low_var_cat = [
    col for col in all_data.select_dtypes(exclude=['number'])
    if 1 - sum(all_data[col] == mode(all_data[col])) / len(all_data) < 0.03
]
low_var_cat

# Dropping these columns from both datasets
all_data = all_data.drop(
    ['Street', 'Utilities', 'Condition2', 'RoofMatl', 'Heating', 'PoolQC'],
    axis=1)

# List of columns to Label Encode
cols = ('FireplaceQu', 'BsmtQual', 'BsmtCond', 'GarageQual', 'GarageCond',
        'ExterQual', 'ExterCond', 'HeatingQC', 'KitchenQual', 'BsmtFinType1',
        'BsmtFinType2', 'Functional', 'Fence', 'BsmtExposure', 'GarageFinish',
        'LandSlope', 'LotShape', 'PavedDrive', 'Alley', 'CentralAir',
        'MSSubClass', 'OverallCond', 'YrSold', 'MoSold')
Example #47
0
def func(video_path):
    # file to store metadata
    metaData = open(
        'C:/Users/ASUS/Desktop/Face Recognition/trial1/Face Detection and Emotion Analysis/src/final1.csv',
        'a')
    writer = csv.writer(metaData)

    # parameters for loading data and images
    detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml'
    emotion_model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5'
    emotion_labels = get_labels('fer2013')

    # hyper-parameters for bounding boxes shape
    frame_window = 10
    emotion_offsets = (20, 40)

    # loading models
    face_detection = load_detection_model(detection_model_path)
    emotion_classifier = load_model(emotion_model_path, compile=False)

    # getting input model shapes for inference
    emotion_target_size = emotion_classifier.input_shape[1:3]

    # starting lists for calculating modes
    emotion_window = []

    toc = time.time()
    # starting video streaming
    cv2.namedWindow('window_frame')
    #video_capture = cv2.VideoCapture(sys.argv[1])
    video_capture = cv2.VideoCapture(video_path)
    #video_capture = cv2.VideoCapture('videoplayback.mp4')

    while True:
        bgr_image = video_capture.read()[1]
        gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
        rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
        faces = detect_faces(face_detection, gray_image)

        frame_count = int(video_capture.get(cv2.CAP_PROP_POS_FRAMES))

        tic = time.time()

        for face_coordinates in faces:

            x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets)
            gray_face = gray_image[y1:y2, x1:x2]
            try:
                gray_face = cv2.resize(gray_face, (emotion_target_size))
            except:
                continue

            actor_face = cv2.resize(gray_face, (128, 128))
            cv2.imwrite(
                "E:/tensorflow-master/tensorflow/examples/image_retraining/face.jpg",
                actor_face)

            video_capture.set(1, int(frame_count))
            ret, frame = video_capture.read()
            cv2.imwrite(
                "E:/Object Detection/models-master/tutorials/image/imagenet/object.jpg",
                gray_image)

            gray_face = preprocess_input(gray_face, True)
            gray_face = np.expand_dims(gray_face, 0)
            gray_face = np.expand_dims(gray_face, -1)
            emotion_prediction = emotion_classifier.predict(gray_face)
            emotion_probability = np.max(emotion_prediction)
            emotion_label_arg = np.argmax(emotion_prediction)
            emotion_text = emotion_labels[emotion_label_arg]
            emotion_window.append(emotion_text)

            s2_out = subprocess.check_output([
                sys.executable,
                "E:/tensorflow-master/tensorflow/examples/label_image/label_image.py",
                "--graph=E:/tmp/output_graph.pb",
                "--labels=E:/tmp/output_labels.txt", "--input_layer=Mul",
                "--output_layer=final_result", "--input_mean=128",
                "--input_std=128",
                "--image=E:/tensorflow-master/tensorflow/examples/image_retraining/face.jpg"
            ])
            actor_confidence = s2_out.split()[1]
            if (float(actor_confidence) > 0.5):
                actor = s2_out.split()[0]
            else:
                actor = ""

            print(s2_out)

            s3_out = subprocess.check_output([
                sys.executable,
                "E:/Object Detection/models-master/tutorials/image/imagenet/classify_image.py",
                "--image_file=E:/Object Detection/models-master/tutorials/image/imagenet/object.jpg"
            ])
            object1 = s3_out.split()[0]
            print(s3_out)

            writer.writerows([[(tic - toc), frame_count, emotion_text,
                               emotion_probability, actor, actor_confidence,
                               face_coordinates, object1]])

            if len(emotion_window) > frame_window:
                emotion_window.pop(0)
            try:
                emotion_mode = mode(emotion_window)
            except:
                continue

            if emotion_text == 'angry':
                color = emotion_probability * np.asarray((255, 0, 0))
            elif emotion_text == 'sad':
                color = emotion_probability * np.asarray((0, 0, 255))
            elif emotion_text == 'happy':
                color = emotion_probability * np.asarray((255, 255, 0))
            elif emotion_text == 'surprise':
                color = emotion_probability * np.asarray((0, 255, 255))
            else:
                color = emotion_probability * np.asarray((0, 255, 0))

            color = color.astype(int)
            color = color.tolist()

            draw_bounding_box(face_coordinates, rgb_image, color)
            draw_text(face_coordinates, rgb_image, emotion_mode, color, 0, -20,
                      1, 1)
            draw_text(face_coordinates, rgb_image, actor, color, 0, -45, 1, 1)

        bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
        cv2.imshow('window_frame', bgr_image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
Example #48
0
    bledne_data = bledny_input_file.read()

bledne_data = bledne_data.splitlines()

bledne_sequences = []
for i in range(0, len(bledne_data), 2):
    bledne_sequences.append([bledne_data[i], bledne_data[i + 1]])

bledne_wyrazy = []

for i in bledne_sequences:
    bledne_roznice = []
    bledne_sequence = i[1]
    bledne_sequence = [int(x) for x in bledne_sequence.split()]
    most_common_difference = 0

    for j in range(len(bledne_sequence) - 1):
        bledne_roznice.append(bledne_sequence[j + 1] - bledne_sequence[j])
    most_common_difference = mode(bledne_roznice)

    for j, roznica in enumerate(bledne_roznice):
        if roznica != most_common_difference:
            if j == 0:
                bledne_wyrazy.append(bledne_sequence[0])
                break
            else:
                # tu teoretycznie mozliwe jest wyjscie out of bonds, ale tak naprade nie jest mozliwe, bo jest jest bledny element to poprzedni tez bedzie bledny
                bledne_wyrazy.append(bledne_sequence[j + 1])
                break
print(bledne_wyrazy)
Example #49
0
    median = (len // 2)
    median = X[median]
else:
    a = len // 2
    median1 = X[a]
    median2 = X[a - 1]
    median = (median1 + median2) / 2

#find the mode
most_frequent = 0
num = 0
for i in X:
    fre = X.count(i)
    if (most_frequent < fre):
        most_frequent = fre
        num = i
print(mean)
print(median)
print(num)
#using stastics module
from statistics import mean, median, mode

#find the mean
mean = mean(X)

#find the median
median = median(X)

#find the mode
mode = mode(X)
Example #50
0
            continue

        # Save figures and targets
        np.save(f'{path_dest}original/{i}_{n_ndl}.npy', orig_small)
        np.save(f'{path_dest}inpainted/{i}_{n_ndl}.npy', last_small)
        np.savez_compressed(f'{path_dest}mask/{i}_{n_ndl}', mask_small)
        names_to_save.append(f'{i}_{n_ndl}')

        # Get the malignancy score
        malignancy_original = df_one_nodule.malignancy.values
        malignancies_original.append(malignancy_original)
        malignancy = list(map(transform_malignancy, malignancy_original))
        malignancy = list(filter(None, malignancy))
        malignancies.append(malignancy)
        try:
            malignancy_mode = mode(malignancy)
            malignancies_mode.append(malignancy_mode)
            # Next lines are to append to malignancies_mode_3_agree (if at least 3 reviewers agree on malignancy)
            agree_with_mode = [
                1 if malignancy_mode == i else 0 for i in malignancy
            ]
            agree_with_mode = np.sum(agree_with_mode)
            if agree_with_mode >= 3:
                malignancies_mode_3_agree.append(malignancy_mode)
                names_to_save_3_agree.append(f'{i}_{n_ndl}')
        except StatisticsError:
            continue

        # These coords can be used to 'plot_block_and_cube'
#         coords_Z_small = coords_Z - z_min_f
#         coords_X_small = coords_X - x_min_f
k = 5

print(x1,x2,y)

nx1 = 8.093607
nx2 = 3.3365732
ddistance = []

for i in range(10):
    A = np.sum((x1[i]-nx1) ** 2 + (x2[i]-nx2) ** 2)
    Distance = np.sqrt(A)
    ddistance.append(Distance)


for i in range(len(ddistance)):
    for j in range(len(ddistance)-i-1):
        if ddistance[j]<ddistance[j+1]:
            ddistance[j],ddistance[j+1]=ddistance[j+1],ddistance[j]
            y[j],y[j+1]=y[j+1],y[j]
print("Distance",ddistance)
print("Y",y)


nbr=list()
for i in range(k):
    nbr.append(y[i])
print(nbr)
knn = mode(nbr)
print(knn)

Example #52
0
def part_1(input_list):
    sleep_dict, minute_dict = parse_log(input_list)
    guard = sorted(sleep_dict.items(), key=lambda x: x[1])[-1][0]
    return guard * mode(minute_dict[guard])
Example #53
0
import math
# length of a diagonal(대각선의 길이)
l = 4
w = 10 
d = math.sqrt(l**2 + w**2)
print(d)

print(math.pow(2, 3))

import random
print(random.randint(0,100))


import statistics
nums = [1, 5, 33, 12, 46, 33, 2]
print(statistics.mean(nums))
print(statistics.median(nums))
print(statistics.mode(nums))

import keyword
print(keyword.iskeyword("for"))
print(keyword.iskeyword("football"))

import hello
print(hello.print_hello())

import os
print(os.path.join("Users", "bob", "st.txt"))


Example #54
0
my_list = [1, 2, 5, 700, 300000]

# Find and print the average of num_list (2pts)
print(sum(num_list) / len(num_list))
# Remove the lowest number from num_list (2pt)
num_list.sort()
del num_list[0]

print(num_list)
# Create and print a new list called top_ten which contains only the 10 highest numbers in num_list(2pts)
top_ten = []

top_ten = num_list
top_ten.sort()

del top_ten[:-10]
print(top_ten)

# PROBLEM 4 (4pts)
# Find the number which appears most often in num_list?
print(mode(num_list))

# CHALLENGE PROBLEMS (2pts)
# TOUGH PROBLEMS, BUT FEW POINTS

# Find the number of prime numbers in num_list?
# Hint: One way is to just start removing the ones that aren't

# Find the number of palindromes
# Hint: This may be easier to do with strings
Example #55
0
 def mode(cls, numbers):
     return statistics.mode(numbers)
# Project Euler Solutions: Problem 059
# Copyright (c) noicepollution. All Rights Reserved.
# Solution timestamp: 00:39, 18 March 2020
# https://github.com/noicepollution/project-euler

from statistics import mode

inp_str = open('problem59.txt').read().split(',')
inp_str = [int(i) for i in inp_str]
lsts = []
for i in range(0, 3):
    lst = []
    for j in range(i, len(inp_str), 3):
        lst.append(inp_str[j])
    lsts.append(lst)

freqs = [mode(i) for i in lsts]
key = [i ^ 32 for i in freqs]
res = sum([inp_str[i] ^ key[i % 3] for i in range(0, len(inp_str))])
print(res)
Example #57
0
xValuesMean = statistics.mean(dataListedSeriesA)
print("The mean is ", xValuesMean)

xValuesMedian = statistics.median(dataListedSeriesA)
print("The median is ", xValuesMedian)

xValuesVari = statistics.variance(dataListedSeriesA)
print("The variance is ", xValuesVari)

xValuesHar = statistics.harmonic_mean(dataListedSeriesA)
print("The Harmonic Mean is ", xValuesHar)

# The mode can sometimes have an error if there is more than one most common number
try:
    xValuesMode = statistics.mode(dataListedSeriesA)
    print("The Mode is ", xValuesMode)

except:
    print("Plot twist...THERE IS NO MODE!")
"""
The csv used for this example:

2013,4,5   
2013,4,5
2014,6,7
2015,8,8
2016,15,9
2017,15,10
"""
Example #58
0
 def classify(self, features):
     votes = []
     for c in self.classifiers:
         v = c.classify(features)
         votes.append(v)
     return mode(votes)
def test_mode(series):
    assert eq(
        c.aggregate(c.ReduceFuncs.Mode(c.item(0))).execute(series),
        statistics.mode(x[0] for x in series),
    )
Example #60
0
    i += 1
    print('Columna: %d' % i)
    for row in range(a[0] - 1):
        j += 1
        if i * j % random.randint(
                1000, 2000) == 0:  # Generates few missing values per feature
            X[j, i] = ''

### 2. HANDLING MISSING DATA   ################################################

#   2.1 Replacing missing data in categorical variable my most common value
from statistics import mode

for row in range(a[0] - 1):  #   3.2 Handling missing data in Geography
    if X[row, 4] == '':
        X[row, 4] = mode(X[:, 4])

for row in range(a[0] - 1):  #   3.3 Handling missing data in Gender
    if X[row, 5] == '':
        X[row, 5] = mode(X[:, 5])

for row in range(a[0] - 1):  #   3.4 Handling missing data in HasCrCard
    if X[row, 10] == '':
        X[row, 10] = mode(X[:, 10])

for row in range(a[0] - 1):  #   3.5 Handling missing data in IsActive
    if X[row, 11] == '':
        X[row, 11] = mode(X[:, 11])

#   2.2 Replacing missing data in countinous variable with mean of column
from sklearn.preprocessing import Imputer