Beispiel #1
0
def clustering():
    global clustering,flag,flag_for_matrix;
    root1=Toplevel(root)
    cluster.main(root1)
    clustering=1
    flag=0
    flag_for_matrix=0
Beispiel #2
0
def main():
    longitud, number = collect.main()
    average = cluster.main()
    positive, negative, ejemplo1, ejemplo2 = classify.main()
    f = open("summary.txt", "w", encoding="utf-8")
    f.write("Number of users collected: %d\n" % longitud)
    f.write("Number of messages collected: %d\n" % number)
    f.write("Average number of users per community: %d\n" % average)
    f.write("Number of instances per class found: %d, %d\n" %
            (positive, negative))
    f.write("%s\n" % str(ejemplo1))
    f.write("%s\n" % str(ejemplo2))
    f.close()
    f2 = open("description.txt", "w", encoding="utf-8")
    f2.write(
        "Study of the impact caused by a tweet from a friend of DowJones in the stock market.\n"
    )
    f2.write(
        "I do a research on TOP 100 friends on Twitter of DowJones account.\n First, I downloaded the values ​​for SP500 for a couple of weeks each minute from finance.google.com.\n With each tweet from DowJones friends during that time, I saw the impact of that tweet on the stock market, calculating the value of the stock market at that time and subtract the value of the stock within 5 minutes of difference. Then I classified as positive if the subtraction is positive and say that type of tweet has a positive impact.\n Otherwise, if the subtraction is negative, the impact of the tweet had negative impact.\n"
    )
    f2.write(
        "In the classifier, with the training tweets I made a cluster of words with 10 means.\n The number of words in each cluster is one of the features that I have entered in my classifier. Another feature is the time each tweet was published.\n With this I have created a classifier that predicts the impact that the tweet of analysts will have on the stock market in SP500.\n"
    )
    f2.write(
        "In the clusters, I have seen the number of communities that can exist. I have found that there are small communities.\n But at no time does it become a single community, like all stock analysts together in one cluster. Moreover I saw where each of the analysts friends was located. I have observed that it is not only the United States, the main country in the clusters, also we have United Kingdom, Iran or even Australia have some importance in these clusters.\n"
    )
    f2.close()
Beispiel #3
0
def main(thispath,thisrate):
    file1 = open('1.txt','w')

    #record the time costed for calculating the LCS between every two samples
    start = time.time()
    sum_of_file = cal_lcs.main(path = thispath)  #call the function cal_lcs.main to sovle the LCS problem
    end = time.time()
    print " LCS's time :" , end-start

    #record the time costed for calculating the distance between every two samples by the LCS
    start = time.time()
    cal_dis.main(sum_of_file)  #calculate the distance between every two samples by the LCS
    end = time.time()
    print " Calculat distance matrix time :" , end-start

    #record the time costed for operating the clustering algorithm
    start = time.time()
    num_dict,num_set = cluster.main(rate = thisrate)  # call the clustering algorithm
    end = time.time()
    print " Cluster time :" , end-start


    for key in num_dict:
        file1.write(str(num_dict[key])+'\n')
    print len(num_dict)
Beispiel #4
0
import csv

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-m',
                        '--metric',
                        default='avg',
                        help='Can be one of {avg, max, min}')
    parser.add_argument('-o',
                        '--out_file',
                        default='result.pkl',
                        help='Pickle file to store the result')
    parser.add_argument('-n',
                        '--num_workers',
                        type=int,
                        default=1,
                        help='Number of workers')
    args = parser.parse_args()
    path = 'spambase/spambase.data'
    reader = csv.reader(open(path), delimiter=',')
    data = []
    target = []
    for row in reader:
        data.append(row[:-1])
        target.append(row[-1])
    data = np.array(data, dtype=float)
    labels = set(target)
    label_to_idx = {v: i for i, v in enumerate(labels)}
    target = np.array([label_to_idx[i] for i in target], dtype=int)
    cluster.main(data, target, args.metric, args.out_file, args.num_workers)
Beispiel #5
0
import cluster
from sklearn.datasets import load_iris
import argparse


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-m', '--metric', default='avg', help='Can be one of {avg, max, min}')
    parser.add_argument('-o', '--out_file', default='result.pkl', help='Pickle file to store the result')
    parser.add_argument('-n', '--num_workers', type=int, default=1, help='Number of workers')
    args = parser.parse_args()
    d = load_iris()
    data = d['data']
    target = d['target']
    cluster.main(data, target, args.metric, args.out_file, args.num_workers)
Beispiel #6
0
def main():
    with open('./summary.txt', 'w') as f:
        sys.stdout = f
        collect.main()
        cluster.main()
        classify.main()
Beispiel #7
0
import collect
import cluster
import classify

collect.main()
cluster.main()
classify.main()

filename = "summary.txt"
file = open(filename, 'w')
file.write('Number of users collected: ' + str(collect.a) + '\n')
file.write('Number of messages collected: ' + str(collect.b) + '\n')
file.write('Number of communities discovered: ' + str(cluster.c) + '\n')
file.write('Average number of users per community: ' + str(cluster.d) + '\n')
file.write('Number of instances per class found: ' + str(classify.out) + '\n')
file.write('One example from each class: ' + str(cluster.instance1) + '\n' +
           str(classify.instance2) + '\n' + str(classify.instance3) + '\n')

file.close()
    def test_cluster(self):
        result = {u'Angels Jeanswear': [u'Embroidery',
                                       u'Diamond',
                                       u'Rectangle',
                                       u'Bead',
                                       u'Angels Jeanswear'],
                 u'Baggies (clothing)': [u'Zip',
                                         u'Pocket',
                                         u'Blacklight paint',
                                         u'Minneapolis',
                                         u'Terrycloth',
                                         u'Fashion accessory',
                                         u'Baggies (clothing)'],
                 u'Bell-bottoms': [u'Bell-bottoms', u'Knee', u'Trousers'],
                 u'Blue Blood Denim': [u'Harvey Nichols', u'Blue Blood Denim', u'Blue blood'],
                 u'Carpenter jeans': [u'Carpenter', u'Carpenter jeans', u'Human leg'],
                 u'Chip and Pepper': [u'Identical twin',
                                      u'Animated television series',
                                      u'NBC',
                                      u'Chip and Pepper'],
                 u'Denim Day': [u'Denim Day', u'Sexual assault', u'Rape'],
                 u'Denim skirt': [u'Orthodox Jew',
                                  u'Fly (clothing)',
                                  u'Pentecostal',
                                  u'Teenager',
                                  u'Denim skirt',
                                  u'Women wearing pants',
                                  u'Back closure',
                                  u'Muslim',
                                  u'Mennonite',
                                  u'Natasha Bedingfield',
                                  u'Miniskirt'],
                 u'Diesel (brand)': [u'Laverda',
                                     u'Breganze',
                                     u'Renzo Rosso',
                                     u'Molvena',
                                     u'Pr\xeat-\xe0-porter',
                                     u'Diesel (brand)'],
                 u'Donna Ida': [u'Australia',
                                u'Donna Ida',
                                u'Drapers',
                                u'Mih jeans',
                                u'Stella McCartney',
                                u'7 for all Mankind',
                                u'Victoria Beckham',
                                u'Jeans for Genes',
                                u'Made in Chelsea',
                                u'London'],
                 u'Dorinha Jeans Wear': [u'Hip',
                                         u'British Columbia',
                                         u'Lingerie',
                                         u'Low-rise jeans',
                                         u'Swimwear',
                                         u'Hip (anatomy)',
                                         u'Low-rise (fashion)',
                                         u'Buttocks',
                                         u'Hip-huggers',
                                         u'Leg',
                                         u'Thigh',
                                         u'Dorinha Jeans Wear',
                                         u'Vancouver'],
                 u'Edwin (clothing retailer)': [u'Model (person)',
                                                u'Brad Pitt',
                                                u'Brand',
                                                u'Arakawa, Tokyo',
                                                u'Japanese yen',
                                                u'Spokesman',
                                                u'Edwin (clothing retailer)',
                                                u'Revenue',
                                                u'Japan'],
                 u'Emma, la polilla fashion': [u'Adidas',
                                               u'Advertisement',
                                               u'Emma, la polilla fashion',
                                               u'Glasses',
                                               u'Athletic shoe',
                                               u'Advertising',
                                               u"Levi's",
                                               u'Rag doll',
                                               u'Argentina',
                                               u'Moth'],
                 u'GAT (jeans)': [u'Skateboarding',
                                  u'Rave',
                                  u'GAT (jeans)',
                                  u'JNCO',
                                  u'Acronym',
                                  u'California'],
                 u'GLO Jeans': [u'GLO Jeans', u'Jones Apparel Group'],
                 u'Gas jeans': [u'Privately held company', u'Gas jeans', u'Chiuppano'],
                 u'Gay Blue Jeans Day': [u'LGBT rights by country or territory',
                                         u'Bowling Green State University',
                                         u'Gay Blue Jeans Day',
                                         u'National Coming Out Day',
                                         u'Gay rights',
                                         u'Same-sex relationship',
                                         u'Sexual orientation and military service',
                                         u'Violence against LGBT people',
                                         u'Immigration equality',
                                         u'World AIDS Day',
                                         u'LGBT rights organization',
                                         u'Legal aspects of transsexualism',
                                         u'Gay Pride Week',
                                         u'LGBT rights opposition',
                                         u'LGBT adoption',
                                         u'Same-sex marriage'],
                 u'Great Western Garment Co.': [u'Canadian',
                                                u'Industry',
                                                u'Edmonton',
                                                u'Bavaria',
                                                u'Great Western Garment Co.',
                                                u'Alexander Cameron Rutherford',
                                                u'World War II',
                                                u'Levi Strauss & Co.',
                                                u'Levi Strauss',
                                                u"Levi's Plaza",
                                                u'Allies of World War II',
                                                u'Chairman of the Board',
                                                u'Buttenheim',
                                                u'British Commonwealth',
                                                u'Stone washing',
                                                u'Dry goods',
                                                u'San Francisco',
                                                u'Alberta',
                                                u'World Wars'],
                 u'Hollister jeans': [u'SoCal',
                                      u'South Korea',
                                      u'Hong Kong',
                                      u'Abercrombie & Fitch Co.',
                                      u'Seagull',
                                      u'CEO',
                                      u'China',
                                      u'South Africa',
                                      u'Southern California',
                                      u'Mike Jeffries (CEO)',
                                      u'Abercrombie & Fitch',
                                      u'Sepia Tone',
                                      u'West Coast of the United States',
                                      u'Hollister jeans',
                                      u'New Albany, Ohio',
                                      u'Ireland',
                                      u'Piper Jaffray',
                                      u'United States dollar',
                                      u'Sweden',
                                      u'Lifestyle brand'],
                 u'Ikeda Jeans': [u'Tax evasion',
                                  u'Japanese people',
                                  u'Ikeda Jeans',
                                  u'Osaka',
                                  u'Ebisu (mythology)',
                                  u'Limited company',
                                  u'Hidehiko Yamane',
                                  u'Tokyo',
                                  u'Prosecutor',
                                  u'North America',
                                  u'Evisu'],
                 u'Iron Heart (Brand)': [u'Iron Heart (Brand)'],
                 u'Jagger (clothing)': [u'St. Petersburg',
                                        u'Prague',
                                        u'Jagger (disambiguation)',
                                        u'Kragujevac',
                                        u'Jagger (clothing)'],
                 u'Jean Machine': [u'American culture',
                                   u'Zurich',
                                   u'Kansas City, Missouri',
                                   u'Warp (weaving)',
                                   u'Montreal',
                                   u'Indigo',
                                   u'American Old West',
                                   u'Kate Winslet',
                                   u'Munich',
                                   u'Merriam, Kansas',
                                   u'Weft',
                                   u'Denim',
                                   u'Skinny jeans',
                                   u'Brussels',
                                   u'N\xeemes',
                                   u'Pacific Brands',
                                   u'Wrangler Jeans',
                                   u'Retailing',
                                   u'Frankfurt',
                                   u'Ontario',
                                   u'Asia',
                                   u'Silver Jeans',
                                   u'Twill',
                                   u'Work wear',
                                   u'Milano',
                                   u'Triple 5 Soul',
                                   u'Salina, Kansas',
                                   u'Copenhagen',
                                   u'Paris',
                                   u'Jacob Davis (inventor)',
                                   u'Sydney',
                                   u'\xc7erkezk\xf6y',
                                   u'Jeans',
                                   u'Adriana Lima',
                                   u'Winnipeg, Manitoba',
                                   u'Greaser (subculture)',
                                   u'Toronto',
                                   u'Australasia',
                                   u'President',
                                   u'Dungaree (fabric)',
                                   u'Guess (clothing)',
                                   u'Clothing',
                                   u'Casual',
                                   u'Jeans (disambiguation)',
                                   u'Chelsea Clinton',
                                   u'Cotton duck',
                                   u'Jean Machine',
                                   u'Mavi Jeans',
                                   u'Casual dress',
                                   u'Copper rivet',
                                   u'Lee (jeans)',
                                   u'South America',
                                   u'Tekirda\u011f Province',
                                   u'Denim (disambiguation)'],
                 u'Jeggings': [u'Jeggings', u'Leggings'],
                 u'Jordache': [u'Outerwear',
                               u'Designer jeans',
                               u'1980s in fashion',
                               u'Types of tennis match',
                               u'Shirt',
                               u'New York City',
                               u'Manufacture',
                               u'1970s in fashion',
                               u'Apparel',
                               u'Jordache'],
                 u'L.e.i. (clothing company)': [u'Retail store',
                                                u'Los Angeles, California',
                                                u'Wal-Mart',
                                                u'Company',
                                                u'L.e.i. (clothing company)'],
                 u'LA Denim Atelier': [u'Paris Hilton', u'Paul Marciano', u'LA Denim Atelier'],
                 u'Lee Cooper': [u'Lee Cooper (cricketer)',
                                 u'Lee Cooper',
                                 u'Sun Capital Partners'],
                 u'Lee National Denim Day': [u'Lee National Denim Day',
                                             u'Entertainment Industry Foundation',
                                             u'Lee Jeans'],
                 u'Legend World Wide': [u'Montenegro',
                                        u'Retail',
                                        u'Serbia',
                                        u'Private company',
                                        u'Subotica',
                                        u'Legend World Wide',
                                        u'Dealership',
                                        u'Wear',
                                        u'International',
                                        u'Franchises',
                                        u'Legend (disambiguation)'],
                 u'LittleBig': [u'Turkey',
                                u'Istanbul',
                                u'Europe',
                                u'Kazan',
                                u'Public company',
                                u'Zagreb',
                                u'Little, Big',
                                u'Dubai',
                                u'Rome',
                                u'Czech Republic',
                                u'Hungary',
                                u'Belgium',
                                u'New York',
                                u'Germany',
                                u'Johannesburg',
                                u'Netherlands',
                                u'Los Angeles',
                                u'New Jersey',
                                u'Anna Falchi',
                                u'France',
                                u'Textile',
                                u'Berlin',
                                u'Bucharest',
                                u'Poland',
                                u'Spain',
                                u'St.Petersburg',
                                u'Italy',
                                u'Ornella Muti',
                                u'United Kingdom',
                                u'Jeddah',
                                u'Cologne',
                                u'Tony Renis',
                                u'Moscow',
                                u'Russia',
                                u'Romania',
                                u'Austria',
                                u'LittleBig',
                                u'SOHO',
                                u'Croatia',
                                u'Types of business entity',
                                u'USA',
                                u'Amsterdam',
                                u'Riyadh'],
                 u'London Denim': [u'London Fashion Week', u'London Denim'],
                 u'Lucky Brand Jeans': [u'Canada',
                                        u"Bloomingdale's",
                                        u'Dillard\u2019s',
                                        u'Liz Claiborne',
                                        u'Macy\u2019s',
                                        u'United Arab Emirates',
                                        u'United States',
                                        u'Lord & Taylor',
                                        u'Holding company',
                                        u'Sportswear (fashion)',
                                        u'Sportswear (activewear)',
                                        u'U.S.',
                                        u'Nordstrom',
                                        u'Vernon, California',
                                        u'Buckle (store)',
                                        u'T-shirts',
                                        u'Puerto Rico',
                                        u'Belk',
                                        u'Lucky Brand Jeans'],
                 u'Menace jeans': [u'T-shirt',
                                   u'Pac Sun',
                                   u'Anchor Blue',
                                   u"Miller's Outpost",
                                   u'Fashion',
                                   u'Brand name',
                                   u'Menace jeans'],
                 u'Miss Sixty': [u'Footwear',
                                 u'Skirt',
                                 u'Top (clothing)',
                                 u'Handbag',
                                 u'Sweatshirt',
                                 u'Jacket',
                                 u'Miss Sixty',
                                 u'Italian fashion',
                                 u'Sunglasses',
                                 u'Shoe',
                                 u'Chieti'],
                 u'Mom jeans': [u'Terminology',
                                u'Belly button',
                                u'Saturday Night Live',
                                u"Mother's Day",
                                u'Pleat',
                                u'Barack Obama',
                                u'Mom jeans'],
                 u'Mudd Jeans': [u'Target Corporation',
                                 u'Marketing',
                                 u"Kohl's",
                                 u'Sears',
                                 u'Iconix Brand Group',
                                 u'JC Penney',
                                 u'The Bon-Ton',
                                 u'Mudd Jeans'],
                 u'Nix Jeans': [u'Nix Jeans'],
                 u'Noko Jeans': [u'Noko Jeans', u'North Korea'],
                 u'Nudie Jeans': [u'Swedish krona',
                                  u'J. Lindeberg',
                                  u'Gothenburg',
                                  u'Acne Jeans',
                                  u'Nudie Jeans',
                                  u'WESC'],
                 u'PRPS': [u'PRPS', u'Nike, Inc.', u'Donwan Harrell'],
                 u'Pepe Jeans': [u'Casual wear',
                                 u'Pepe Jeans',
                                 u'Sant Feliu de Llobregat',
                                 u'Portobello Road'],
                 u'Phat pants': [u'Phat pants'],
                 u'Pleated jeans': [u'Grunge rock',
                                    u'Pleated jeans',
                                    u'Gianni Versace S.p.A.',
                                    u'Sean John'],
                 u'Red & Black Jeans': [u'Kosovo', u'Red & Black Jeans'],
                 u'Sergio Valente (clothing)': [u'Seattle, Washington',
                                                u'Duda (Portuguese footballer)',
                                                u'Kent, Washington',
                                                u'Sergio Valente (clothing)',
                                                u'UK'],
                 u'Slim-fit pants': [u'Spandex',
                                     u'Capris',
                                     u'Zippers',
                                     u'Slim-fit pants',
                                     u'Tight trousers'],
                 u'Spiegel (catalog)': [u'Owner', u'Spiegel (catalog)'],
                 u"Studio d'Artisan": [u"Studio d'Artisan"],
                 u'SuperGroup plc': [u'SuperGroup plc',
                                     u'Earnings before interest and taxes',
                                     u'Cheltenham',
                                     u'Superdry',
                                     u'Peter Bamford',
                                     u'Chair (official)',
                                     u'Asahi Super Dry',
                                     u'London Stock Exchange',
                                     u'FTSE 250 Index',
                                     u'Ticker symbol'],
                 u'TB Jeans': [u'TB Jeans', u'Rain (entertainer)', u'MBLAQ', u'South Korean'],
                 u'Toughskins': [u'Corduroy',
                                 u'Sears, Roebuck and Co.',
                                 u'Denim jacket',
                                 u'Nylon',
                                 u'Cotton',
                                 u'Toughskins',
                                 u'Polyester'],
                 u'True Religion': [u'Asset',
                                    u'Net income',
                                    u'Manhattan Beach, California',
                                    u'True Religion',
                                    u'World',
                                    u'One true faith',
                                    u'United States Dollar',
                                    u'Chairman',
                                    u'Chief executive officer',
                                    u'NASDAQ',
                                    u'Equity (finance)'],
                 u'VB Rocks': [u'Rock and Republic', u'VB Rocks'],
                 u'Wide leg jeans': [u'Wide leg jeans', u'Ethnic'],
                 u'William Rast': [u'Justin Timberlake', u'William Rast'],
                 u'Wrangler (jeans)': [u'VF Corporation',
                                       u'JanSport',
                                       u'Lee (Jeans)',
                                       u'Wrangler (jeans)',
                                       u'Greensboro, North Carolina',
                                       u'The North Face']}

        self.assertEqual(cluster.main(["data/jeans.pkl"], True), result)
		print entropy
		print entropy_ori
		print entropy_reduction

		# split node
		if entropy_reduction > 0.005:

		 	# locate frontier in history and create new history
		 #	for i, (h, w) in enumerate(history):
		 #		if node == h:
		 #			del new_history[i]
	 
	 	#	history = history + [(phi1, w1), (phi2, w2)]


		# declare node terminal


if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument("train_text", help="file name of training text")
	args = parser.parse_args()
	train_text = args.train_text

	(encodings, f_bigram) = cluster.main()

	print "\nBit-Encoding Based Decision-Tree Language Model:"
	print "------------------------------------------------"

	bit_encoding(encodings, train_text, f_bigram)
Beispiel #10
0
        utils.print_option('spark', 'generates spark files and upload', hassub=True)
        utils.print_option('package', 'interact with package distribution', hassub=True)

        # utils.print_option('test')
        # utils.print_option('interact')
        # utils.print_option('rsync')
        utils.print_header("")

    if utils.inputMatch(['full']):
        setupFull()

    if utils.inputMatch(['kill']):
        cluster.instanceKillAll()

    if utils.inputMatch(['cluster'], doPartial=True):
        sys.argv = sys.argv[1:]
        cluster.main()

    if utils.inputMatch(['hadoop'], doPartial=True):
        sys.argv = sys.argv[1:]
        hadoop.main()

    if utils.inputMatch(['spark'], doPartial=True):
        sys.argv = sys.argv[1:]
        spark.main()

    if utils.inputMatch(['package'], doPartial=True):
        sys.argv = sys.argv[1:]
        package.main()