Esempio n. 1
0
	def save_result(self):
		'''Save classify result.'''
		
		now = str(datetime.now()).replace(':', '-')
		
		file_opt = options = {}
		options['defaultextension'] = ''
		options['initialfile'] = 'ff_' + now[:19] + '.pickle'
		options['filetypes'] = [('pickle file', '.pickle')]
		options['initialdir'] = getcwd() + '\\data'
		options['title'] = 'Save Result'
		options['parent'] = self.parent
		
		# create dir if not exist
		if not path.exists(options['initialdir']):
			makedirs(options['initialdir'])
		
		file_name = tkFD.asksaveasfilename(**file_opt)
		
		self.text_output.delete('1.0', END)
		if file_name:
			if self.FFF._factor_finder.save(file_name):
				self.text_output.insert(END, 'Saving result successful...' + '\n\n')
				util.debug('saved')
			else:
				self.text_output.insert(END, 'Saving result failed...' + '\n\n')
				util.debug('failed to save result')
		pass
Esempio n. 2
0
    def load_result(self):
        '''Load classify result.'''

        file_opt = options = {}
        options['defaultextension'] = ''
        options['filetypes'] = [('pickle file', '.pickle')]
        options['initialdir'] = getcwd() + '\\data'
        options['title'] = 'Load Result'
        options['parent'] = self.parent

        # create dir if not exist
        if not path.exists(options['initialdir']):
            makedirs(options['initialdir'])

        file_name = tkFD.askopenfilename(**file_opt)

        if file_name:
            print file_name
            flag, self.FFF._factor_finder = self.FFF._factor_finder.load(
                file_name)
            self.text_output.delete('1.0', END)

            # print to the output text
            if flag:
                util.debug('Loaded')
                self.text_output.insert(END, 'Result loaded...' + '\n\n')

            else:
                self.text_output.insert(END, 'Failed to load result...')
                util.debug('Failed to load result')
Esempio n. 3
0
	def load_result(self):
		'''Load classify result.'''
		
		file_opt = options = {}
		options['defaultextension'] = ''		
		options['filetypes'] = [('pickle file', '.pickle')]
		options['initialdir'] = getcwd() + '\\data'
		options['title'] = 'Load Result'
		options['parent'] = self.parent
		
		# create dir if not exist
		if not path.exists(options['initialdir']):
			makedirs(options['initialdir'])
		
		file_name = tkFD.askopenfilename(**file_opt)
		
		if file_name:
			print file_name
			flag, self.FFF._factor_finder = self.FFF._factor_finder.load(file_name)
			self.text_output.delete('1.0', END)
			
			# print to the output text
			if flag:
				util.debug('Loaded')
				self.text_output.insert(END, 'Result loaded...' + '\n\n')

			else:
				self.text_output.insert(END, 'Failed to load result...')
				util.debug('Failed to load result')
Esempio n. 4
0
    def save_classifier(self):
        '''Save classifier.'''

        now = str(datetime.now()).replace(':', '-')

        file_opt = options = {}
        options['defaultextension'] = ''
        options['initialfile'] = 'c_' + now[:19] + '.pickle'
        options['filetypes'] = [('pickle file', '.pickle')]
        options['initialdir'] = getcwd() + '\\data'
        options['title'] = 'Save classifier'
        options['parent'] = self.parent

        # create dir if not exist
        if not path.exists(options['initialdir']):
            makedirs(options['initialdir'])

        file_name = tkFD.asksaveasfilename(**file_opt)

        self.text_output.delete('1.0', END)
        if file_name:
            if self.FFF._classifier.save(file_name):
                self.text_output.insert(
                    END, 'Saving classifier successful...' + '\n\n')
                util.debug('saved')
            else:
                self.text_output.insert(END,
                                        'Saving classifier failed...' + '\n\n')
                util.debug('failed to save classifier')
Esempio n. 5
0
	def extract_topic(self):
		'''Get when the breakpoints happened. Show the graph if it's selected.'''
		
		self.text_output.delete('1.0', END)
		
		# check if the data is not empty
		if self.FFF._factor_finder.list_tweet == None:
			self.text_output.insert(END, 'Failed to extract topic, list tweet is empty' + '\n')	

		else:
			# retrieve parameter			
			try:				
				# date
				start_time = datetime.strptime(self.ent_start_date.get() + ' ' + self.ent_start_time.get(), '%d-%m-%Y %H:%M:%S')
				end_time = datetime.strptime(self.ent_end_date.get() + ' ' + self.ent_end_time.get(), '%d-%m-%Y %H:%M:%S')
				duration_hour = int(self.ent_duration.get())
				decay_factor = float(self.ent_decay_factor.get())
				show_graph = bool(self.show_graph_val.get())
				
			except Exception, e:
				util.debug('retrieve parameter error')
				self.text_output.insert(END, 'Parameter error' + '\n')
			
			# to avoid long  variable name, just call it div_sent short of divide_sentiment, pffttt...
			div_sent = self.FFF._factor_finder.divide_sentiment_time(start_time, end_time, duration_hour, decay_factor)
			break_point = self.FFF._factor_finder.get_break_points()
			
			# print retval_divide_sentiment and retval_break_point to output text
			self.text_output.insert(END, 'Topic Extraction : ' + '\n\n')
			
			# topic extraction
			# topics = self.FFF._factor_finder.get_break_point_topics()
			topics_pos = self.FFF._factor_finder.get_all_topics(5, decay_factor, 1)
			topics_neg = self.FFF._factor_finder.get_all_topics(5, decay_factor, -1)
			
			self.text_output.insert(END, 'Topic Extraction parameters : ' + '\n')
			datetime.now().strftime('%d-%m-%Y')
			self.text_output.insert(END, 'Start time : ' + str(start_time) + '\n')
			self.text_output.insert(END, 'End time : ' + str(end_time) + '\n')
			self.text_output.insert(END, 'Duration : ' + str(duration_hour) + '\n')
			self.text_output.insert(END, 'Discounted Cumulative Factor : ' + str(decay_factor) + '\n\n')
		
			self.text_output.insert(END, 'No\tStart time\t\t     Num\t\tSentiment\tKumulatif\n')
			i = 0
			for idx in div_sent:
				#self.text_output.insert(END, str(i + 1) + '\t' + str(div_sent[idx]['start_time']) + '\t' + str(div_sent[idx]['end_time'])[:13] + '\t' + str(len(div_sent[idx]['list_tweet'])) + '\t\t' + str(div_sent[idx]['sentiment'])[:6] + '\t' + str(div_sent[idx]['cum_sentiment'])[:6] +'\n')
				self.text_output.insert(END, str(i + 1) + '\t' + str(div_sent[idx]['start_time']) + '\t\t' + str(len(div_sent[idx]['list_tweet'])) + '\t\t' + str(div_sent[idx]['sentiment'])[:6] + '\t' + str(div_sent[idx]['cum_sentiment'])[:6] +'\n')
				self.text_output.insert(END, '\tPositif Topics :\t'+ ',  '.join(topics_pos[idx]) + '\n')
				self.text_output.insert(END, '\tNegatif Topics :\t'+ ',  '.join(topics_neg[idx]) + '\n')
				i += 1
			
			
			# show graph or not
			if show_graph:
				self.text_output.insert(END, '\nShowing graph' + '\n')
				self.text_output.insert(END, 'Close pop up to continue' + '\n')	
				self.FFF._factor_finder.plot_graph()
Esempio n. 6
0
def test(model, fusion_datasets, configs, load_weight_path=False, save_path=None):
    model.eval()

    if load_weight_path:
        assert configs['TEST']['weight_path'] != 'None', 'Test Need To Resume Chekpoint'
        weight_path = configs['TEST']['weight_path']
        checkpoint = torch.load(weight_path)
        model.load_state_dict(checkpoint['model'].state_dict())
    is_use_gpu = torch.cuda.is_available()

    test_dataloader = DataLoader(fusion_datasets, batch_size=configs['TEST']['batch_size'], shuffle=False)
    test_num_iter = len(test_dataloader)
    dtransforms = transforms.Compose([transforms.ToPILImage()])

    with tqdm(total=test_num_iter) as test_bar:
        for iter, data in enumerate(test_dataloader):

            if is_use_gpu:
                model = model.cuda()
                data = {sensor: data[sensor].cuda() for sensor in data}

            fusion_image = model(data)

            input_imgs, fusion_imgs = debug(configs['MODEL'], configs['TEST_DATASET'], data, fusion_image)
            input_imgs = [input_imgs[sensor] for sensor in configs['MODEL']['input_sensors']]
            imgs = input_imgs + [fusion_imgs]
            imgs = torch.cat(imgs, dim=3)
            for batch in range(imgs.shape[0]):
                if save_path is None:
                    save_path = configs['TEST']['save_path']
                name = os.path.join(save_path, str(len(os.listdir(save_path))))
                img = imgs[batch].cpu()
                img = dtransforms(img)
                img.save(f'{name}.jpg')
            test_bar.update(1)
Esempio n. 7
0
	def load_classifier(self):
		'''Load classifier.'''
		
		file_opt = options = {}
		options['defaultextension'] = ''		
		options['filetypes'] = [('pickle file', '.pickle')]
		options['initialdir'] = getcwd() + '\\data'
		options['title'] = 'Load classifier'
		options['parent'] = self.parent
		
		# create dir if not exist
		if not path.exists(options['initialdir']):
			makedirs(options['initialdir'])
		
		file_name = tkFD.askopenfilename(**file_opt)
		
		if file_name:
			print file_name
			flag, self.FFF._classifier = self.FFF._classifier.load(file_name)
			self.text_output.delete('1.0', END)
			
			# print to the output text
			if flag:
				util.debug('Loaded')
				self.text_output.insert(END, 'Classifier loaded...' + '\n\n')
				
				self.text_output.insert(END, 'Preprocess Parameter : ' + '\n')
				self.text_output.insert(END, 'Fold Case : ' + str(bool(self.FFF._classifier.dict_param['fold_case']))  + '\n')
				self.text_output.insert(END, 'Remove RT : ' + str(bool(self.FFF._classifier.dict_param['remove_RT'])) +'\n')
				self.text_output.insert(END, 'Remove Hashtag : ' + str(bool(self.FFF._classifier.dict_param['remove_hashtag'])) + '\n')
				self.text_output.insert(END, 'Remove Username : '******'remove_username']))  + '\n')
				self.text_output.insert(END, 'Convert Number : ' + str(bool(self.FFF._classifier.dict_param['convert_number'])) + '\n')
				self.text_output.insert(END, 'Clean Number : ' + str(bool(self.FFF._classifier.dict_param['clean_number'])) + '\n')
				self.text_output.insert(END, 'Convert Emoticon : ' + str(bool(self.FFF._classifier.dict_param['convert_emoticon'])) + '\n')
				self.text_output.insert(END, 'Remove Punctuation : ' + str(bool(self.FFF._classifier.dict_param['remove_punctuation_string'])) + '\n')
				self.text_output.insert(END, 'Convert Word : ' + str(bool(self.FFF._classifier.dict_param['convert_word'])) + '\n')
				self.text_output.insert(END, 'Remove Stop Word : ' + str(bool(self.FFF._classifier.dict_param['remove_stop_words'])) + '\n')
				self.text_output.insert(END, 'Convert Negation : ' + str(bool(self.FFF._classifier.dict_param['convert_negation'])) + '\n\n')
				
				self.text_output.insert(END, 'Minimal Occur : ' + str(self.FFF._classifier.min_occur) + '\n')

			else:
				self.text_output.insert(END, 'Failed to load classifier...')
				util.debug('Failed to load')
Esempio n. 8
0
def train(model, train_datasets, test_datasets, configs):
    if not os.path.exists(
            os.path.join(configs['PROJECT']['save_path'],
                         configs['PROJECT']['name'])):
        os.mkdir(
            os.path.join(configs['PROJECT']['save_path'],
                         configs['PROJECT']['name']))

    model.train()

    train_writer = SummaryWriter(log_dir=os.path.join(
        configs['PROJECT']['save_path'], configs['PROJECT']['name']))

    if configs['TRAIN']['resume'] == 'None':
        start_epoch = 1
    else:
        start_epoch = torch.load(configs['TRAIN']['resume'])['epoch'] + 1

    is_use_gpu = torch.cuda.is_available()

    optimizer = eval('torch.optim.' + configs['TRAIN']['opt'])(
        model.parameters(), configs['TRAIN']['lr'])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=configs['TRAIN']['milestones'],
        gamma=configs['TRAIN']['gamma'])

    train_dataloader = DataLoader(train_datasets,
                                  batch_size=configs['TRAIN']['batch_size'],
                                  shuffle=True)
    train_num_iter = len(train_dataloader)

    loss_func = [eval(l)() for l in configs['TRAIN']['loss_func']]

    all_iter = 0
    for epoch in range(start_epoch, configs['TRAIN']['max_epoch'] + 1):

        loss_epoch = 0

        with tqdm(total=train_num_iter) as train_bar:
            for iter, data in enumerate(train_dataloader):

                if is_use_gpu:
                    model = model.cuda()
                    data = {sensor: data[sensor].cuda() for sensor in data}

                fusion_image = model(data)

                loss = [
                    l(data, fusion_image) *
                    configs['TRAIN']['loss_weights'][loss_func.index(l)]
                    for l in loss_func
                ]

                loss_batch = sum(loss)

                loss_epoch += loss_batch.item()
                optimizer.zero_grad()
                loss_batch.backward()
                optimizer.step()

                train_writer.add_scalar('loss',
                                        loss_batch,
                                        global_step=all_iter)
                train_bar.set_description(
                    'Epoch: {}/{}. TRAIN. Iter: {}/{}. All loss: {:.5f}'.
                    format(epoch, configs['TRAIN']['max_epoch'], iter + 1,
                           train_num_iter, loss_epoch / train_num_iter))
                if configs['TRAIN'][
                        'debug_interval'] is not None and all_iter % configs[
                            'TRAIN']['debug_interval'] == 0:
                    input_imgs, fusion_imgs = debug(configs['MODEL'],
                                                    configs['TRAIN_DATASET'],
                                                    data, fusion_image)
                    input_imgs = [
                        input_imgs[sensor]
                        for sensor in configs['MODEL']['input_sensors']
                    ]
                    imgs = input_imgs + [fusion_imgs]
                    train_writer.add_image('debug',
                                           torch.cat(imgs, dim=2),
                                           all_iter,
                                           dataformats='NCHW')

                all_iter += 1
                train_bar.update(1)

            scheduler.step()

            train_writer.add_scalar(
                'lr',
                optimizer.state_dict()['param_groups'][0]['lr'],
                global_step=epoch)

            if configs['TRAIN'][
                    'val_interval'] is not None and all_iter % configs[
                        'TRAIN']['val_interval'] == 0:
                torch.save({
                    'model': model,
                    'epoch': epoch
                },
                           os.path.join(configs['PROJECT']['save_path'],
                                        configs['PROJECT']['name'],
                                        f'model_{epoch}.pth'))
Esempio n. 9
0
	def classify(self):
		'''Classify some tweets according to the current classifier.'''
		
		self.text_output.delete('1.0', END)
		
		# checking whether the classifier has been trained or not
		if not self.FFF._classifier.trained:
			self.text_output.insert(END, 'Gagal mengklasifikasikan, belum ditraining' + '\n')	
			
		else:
			try:
				num_tweet = int(self.ent_num_tweet_classify.get())
				random_seed = int(self.ent_random_seed_classify.get())
				keyword = self.ent_keyword_classify.get()
				
				# date
				start_time = datetime.strptime(self.ent_start_date.get() + ' ' + self.ent_start_time.get(), '%d-%m-%Y %H:%M:%S')
				end_time = datetime.strptime(self.ent_end_date.get() + ' ' + self.ent_end_time.get(), '%d-%m-%Y %H:%M:%S')

			except Exception, e:
				raise e
				debug(str(e))
			
			test_data = tm.get_test_data(keyword, start_time, end_time)
			if random_seed != 0:
				random.seed(random_seed)
				random.shuffle(test_data)
			list_tweet = self.FFF.classify_tweets(test_data, keyword, num_tweet)
			
			# print to the output text
			self.text_output.delete('1.0', END)
			self.text_output.insert(END, 'Hasil Klasifikasi' + '\n\n')
			self.text_output.insert(END, 'Preprocess Parameter : ' + '\n\n')
			self.text_output.insert(END, 'Fold Case : ' + str(bool(self.FFF._classifier.dict_param['fold_case']))  + '\n')
			self.text_output.insert(END, 'Remove RT : ' + str(bool(self.FFF._classifier.dict_param['remove_RT'])) +'\n')
			self.text_output.insert(END, 'Remove Hashtag : ' + str(bool(self.FFF._classifier.dict_param['remove_hashtag'])) + '\n')
			self.text_output.insert(END, 'Remove Username : '******'remove_username']))  + '\n')
			self.text_output.insert(END, 'Convert Number : ' + str(bool(self.FFF._classifier.dict_param['convert_number'])) + '\n')
			self.text_output.insert(END, 'Clean Number : ' + str(bool(self.FFF._classifier.dict_param['clean_number'])) + '\n')
			self.text_output.insert(END, 'Convert Emoticon : ' + str(bool(self.FFF._classifier.dict_param['convert_emoticon'])) + '\n')
			self.text_output.insert(END, 'Remove Punctuation : ' + str(bool(self.FFF._classifier.dict_param['remove_punctuation_string'])) + '\n')
			self.text_output.insert(END, 'Convert Word : ' + str(bool(self.FFF._classifier.dict_param['convert_word'])) + '\n')
			self.text_output.insert(END, 'Remove Stop Word : ' + str(bool(self.FFF._classifier.dict_param['remove_stop_words'])) + '\n')
			self.text_output.insert(END, 'Convert Negation : ' + str(bool(self.FFF._classifier.dict_param['convert_negation'])) + '\n\n')
			
			self.text_output.insert(END, 'Minimal Kemunculan : ' + str(self.FFF._classifier.min_occur) + '\n\n')
			
			self.text_output.insert(END, 'Keyword : ' + keyword + '\n')
			self.text_output.insert(END, 'Banyak Tweet : ' + str(self.FFF._classifier.num_tweet_classified) + '\n')
			self.text_output.insert(END, 'Random Seed : ' + str(random_seed) + '\n')
			
			
			# show result
			self.show_tweet = self.show_tweet_val.get()
			list_sentiment = []
			if self.show_tweet == 1:
				for t in list_tweet:
					self.text_output.insert(END, 'Tweet : ' + str(t.get_normal_text()) + '\n')
					self.text_output.insert(END, 'Sentiment : ' + str(t.sentiment) + '\n\n')
					list_sentiment.append(t.sentiment)
			else:
				for t in list_tweet:
					list_sentiment.append(t.sentiment)

			self.text_output.insert(END, '\n')
			self.text_output.insert(END, 'Result : \n')
			self.text_output.insert(END, 'Positif :'+ str(list_sentiment.count(1)) + ' \n')
			self.text_output.insert(END, 'Netral :'+ str(list_sentiment.count(0)) + ' \n')
			self.text_output.insert(END, 'Negatif :'+ str(list_sentiment.count(-1)) + ' \n')
			
			# this lontong function
			from xlwt import Workbook
			from tempfile import TemporaryFile
			
			book = Workbook()
			
			try:
				activeSheet = book.add_sheet(str('fuuu'))
					
				i = 1
				activeSheet.write(i, 0, 'No')
				activeSheet.write(i, 1, 'Created')
				activeSheet.write(i, 2, 'Text')
				activeSheet.write(i, 3, 'Sentiment')
				
				i += 1
				
				for tweet in list_tweet:
					activeSheet.write(i, 0, str(i - 1))
					activeSheet.write(i, 1, tweet.time.__str__())
					activeSheet.write(i, 2, str(tweet.get_normal_text()))
					activeSheet.write(i, 3, tweet.sentiment)
					
					i += 1
				pret = str(start_time).replace(':', '-')
				book.save('lontong' + pret+ '.xls')
				book.save(TemporaryFile())
			
			except Exception, e:
				util.debug(str(e))
Esempio n. 10
0
    def load_classifier(self):
        '''Load classifier.'''

        file_opt = options = {}
        options['defaultextension'] = ''
        options['filetypes'] = [('pickle file', '.pickle')]
        options['initialdir'] = getcwd() + '\\data'
        options['title'] = 'Load classifier'
        options['parent'] = self.parent

        # create dir if not exist
        if not path.exists(options['initialdir']):
            makedirs(options['initialdir'])

        file_name = tkFD.askopenfilename(**file_opt)

        if file_name:
            print file_name
            flag, self.FFF._classifier = self.FFF._classifier.load(file_name)
            self.text_output.delete('1.0', END)

            # print to the output text
            if flag:
                util.debug('Loaded')
                self.text_output.insert(END, 'Classifier loaded...' + '\n\n')

                self.text_output.insert(END, 'Preprocess Parameter : ' + '\n')
                self.text_output.insert(
                    END, 'Fold Case : ' +
                    str(bool(self.FFF._classifier.dict_param['fold_case'])) +
                    '\n')
                self.text_output.insert(
                    END, 'Remove RT : ' +
                    str(bool(self.FFF._classifier.dict_param['remove_RT'])) +
                    '\n')
                self.text_output.insert(
                    END, 'Remove Hashtag : ' +
                    str(bool(
                        self.FFF._classifier.dict_param['remove_hashtag'])) +
                    '\n')
                self.text_output.insert(
                    END, 'Remove Username : '******'remove_username']
                             )) + '\n')
                self.text_output.insert(
                    END, 'Convert Number : ' +
                    str(bool(
                        self.FFF._classifier.dict_param['convert_number'])) +
                    '\n')
                self.text_output.insert(
                    END, 'Clean Number : ' +
                    str(bool(self.FFF._classifier.dict_param['clean_number']))
                    + '\n')
                self.text_output.insert(
                    END, 'Convert Emoticon : ' + str(
                        bool(self.FFF._classifier.
                             dict_param['convert_emoticon'])) + '\n')
                self.text_output.insert(
                    END, 'Remove Punctuation : ' + str(
                        bool(self.FFF._classifier.
                             dict_param['remove_punctuation_string'])) + '\n')
                self.text_output.insert(
                    END, 'Convert Word : ' +
                    str(bool(self.FFF._classifier.dict_param['convert_word']))
                    + '\n')
                self.text_output.insert(
                    END, 'Remove Stop Word : ' + str(
                        bool(self.FFF._classifier.
                             dict_param['remove_stop_words'])) + '\n')
                self.text_output.insert(
                    END, 'Convert Negation : ' + str(
                        bool(self.FFF._classifier.
                             dict_param['convert_negation'])) + '\n\n')

                self.text_output.insert(
                    END, 'Minimal Occur : ' +
                    str(self.FFF._classifier.min_occur) + '\n')

            else:
                self.text_output.insert(END, 'Failed to load classifier...')
                util.debug('Failed to load')
Esempio n. 11
0
    def extract_topic(self):
        '''Get when the breakpoints happened. Show the graph if it's selected.'''

        self.text_output.delete('1.0', END)

        # check if the data is not empty
        if self.FFF._factor_finder.list_tweet == None:
            self.text_output.insert(
                END, 'Failed to extract topic, list tweet is empty' + '\n')

        else:
            # retrieve parameter
            try:
                # date
                start_time = datetime.strptime(
                    self.ent_start_date.get() + ' ' +
                    self.ent_start_time.get(), '%d-%m-%Y %H:%M:%S')
                end_time = datetime.strptime(
                    self.ent_end_date.get() + ' ' + self.ent_end_time.get(),
                    '%d-%m-%Y %H:%M:%S')
                duration_hour = int(self.ent_duration.get())
                decay_factor = float(self.ent_decay_factor.get())
                show_graph = bool(self.show_graph_val.get())

            except Exception, e:
                util.debug('retrieve parameter error')
                self.text_output.insert(END, 'Parameter error' + '\n')

            # to avoid long  variable name, just call it div_sent short of divide_sentiment, pffttt...
            div_sent = self.FFF._factor_finder.divide_sentiment_time(
                start_time, end_time, duration_hour, decay_factor)
            break_point = self.FFF._factor_finder.get_break_points()

            # print retval_divide_sentiment and retval_break_point to output text
            self.text_output.insert(END, 'Topic Extraction : ' + '\n\n')

            # topic extraction
            # topics = self.FFF._factor_finder.get_break_point_topics()
            topics_pos = self.FFF._factor_finder.get_all_topics(
                5, decay_factor, 1)
            topics_neg = self.FFF._factor_finder.get_all_topics(
                5, decay_factor, -1)

            self.text_output.insert(END,
                                    'Topic Extraction parameters : ' + '\n')
            datetime.now().strftime('%d-%m-%Y')
            self.text_output.insert(END,
                                    'Start time : ' + str(start_time) + '\n')
            self.text_output.insert(END, 'End time : ' + str(end_time) + '\n')
            self.text_output.insert(END,
                                    'Duration : ' + str(duration_hour) + '\n')
            self.text_output.insert(
                END,
                'Discounted Cumulative Factor : ' + str(decay_factor) + '\n\n')

            self.text_output.insert(
                END, 'No\tStart time\t\t     Num\t\tSentiment\tKumulatif\n')
            i = 0
            for idx in div_sent:
                #self.text_output.insert(END, str(i + 1) + '\t' + str(div_sent[idx]['start_time']) + '\t' + str(div_sent[idx]['end_time'])[:13] + '\t' + str(len(div_sent[idx]['list_tweet'])) + '\t\t' + str(div_sent[idx]['sentiment'])[:6] + '\t' + str(div_sent[idx]['cum_sentiment'])[:6] +'\n')
                self.text_output.insert(
                    END,
                    str(i + 1) + '\t' + str(div_sent[idx]['start_time']) +
                    '\t\t' + str(len(div_sent[idx]['list_tweet'])) + '\t\t' +
                    str(div_sent[idx]['sentiment'])[:6] + '\t' +
                    str(div_sent[idx]['cum_sentiment'])[:6] + '\n')
                self.text_output.insert(
                    END, '\tPositif Topics :\t' + ',  '.join(topics_pos[idx]) +
                    '\n')
                self.text_output.insert(
                    END, '\tNegatif Topics :\t' + ',  '.join(topics_neg[idx]) +
                    '\n')
                i += 1

            # show graph or not
            if show_graph:
                self.text_output.insert(END, '\nShowing graph' + '\n')
                self.text_output.insert(END, 'Close pop up to continue' + '\n')
                self.FFF._factor_finder.plot_graph()
Esempio n. 12
0
    def classify(self):
        '''Classify some tweets according to the current classifier.'''

        self.text_output.delete('1.0', END)

        # checking whether the classifier has been trained or not
        if not self.FFF._classifier.trained:
            self.text_output.insert(
                END, 'Gagal mengklasifikasikan, belum ditraining' + '\n')

        else:
            try:
                num_tweet = int(self.ent_num_tweet_classify.get())
                random_seed = int(self.ent_random_seed_classify.get())
                keyword = self.ent_keyword_classify.get()

                # date
                start_time = datetime.strptime(
                    self.ent_start_date.get() + ' ' +
                    self.ent_start_time.get(), '%d-%m-%Y %H:%M:%S')
                end_time = datetime.strptime(
                    self.ent_end_date.get() + ' ' + self.ent_end_time.get(),
                    '%d-%m-%Y %H:%M:%S')

            except Exception, e:
                raise e
                debug(str(e))

            test_data = tm.get_test_data(keyword, start_time, end_time)
            if random_seed != 0:
                random.seed(random_seed)
                random.shuffle(test_data)
            list_tweet = self.FFF.classify_tweets(test_data, keyword,
                                                  num_tweet)

            # print to the output text
            self.text_output.delete('1.0', END)
            self.text_output.insert(END, 'Hasil Klasifikasi' + '\n\n')
            self.text_output.insert(END, 'Preprocess Parameter : ' + '\n\n')
            self.text_output.insert(
                END, 'Fold Case : ' +
                str(bool(self.FFF._classifier.dict_param['fold_case'])) + '\n')
            self.text_output.insert(
                END, 'Remove RT : ' +
                str(bool(self.FFF._classifier.dict_param['remove_RT'])) + '\n')
            self.text_output.insert(
                END, 'Remove Hashtag : ' +
                str(bool(self.FFF._classifier.dict_param['remove_hashtag'])) +
                '\n')
            self.text_output.insert(
                END, 'Remove Username : '******'remove_username'])) +
                '\n')
            self.text_output.insert(
                END, 'Convert Number : ' +
                str(bool(self.FFF._classifier.dict_param['convert_number'])) +
                '\n')
            self.text_output.insert(
                END, 'Clean Number : ' +
                str(bool(self.FFF._classifier.dict_param['clean_number'])) +
                '\n')
            self.text_output.insert(
                END, 'Convert Emoticon : ' +
                str(bool(self.FFF._classifier.dict_param['convert_emoticon']))
                + '\n')
            self.text_output.insert(
                END, 'Remove Punctuation : ' + str(
                    bool(self.FFF._classifier.
                         dict_param['remove_punctuation_string'])) + '\n')
            self.text_output.insert(
                END, 'Convert Word : ' +
                str(bool(self.FFF._classifier.dict_param['convert_word'])) +
                '\n')
            self.text_output.insert(
                END, 'Remove Stop Word : ' +
                str(bool(self.FFF._classifier.dict_param['remove_stop_words']))
                + '\n')
            self.text_output.insert(
                END, 'Convert Negation : ' +
                str(bool(self.FFF._classifier.dict_param['convert_negation']))
                + '\n\n')

            self.text_output.insert(
                END, 'Minimal Kemunculan : ' +
                str(self.FFF._classifier.min_occur) + '\n\n')

            self.text_output.insert(END, 'Keyword : ' + keyword + '\n')
            self.text_output.insert(
                END, 'Banyak Tweet : ' +
                str(self.FFF._classifier.num_tweet_classified) + '\n')
            self.text_output.insert(END,
                                    'Random Seed : ' + str(random_seed) + '\n')

            # show result
            self.show_tweet = self.show_tweet_val.get()
            list_sentiment = []
            if self.show_tweet == 1:
                for t in list_tweet:
                    self.text_output.insert(
                        END, 'Tweet : ' + str(t.get_normal_text()) + '\n')
                    self.text_output.insert(
                        END, 'Sentiment : ' + str(t.sentiment) + '\n\n')
                    list_sentiment.append(t.sentiment)
            else:
                for t in list_tweet:
                    list_sentiment.append(t.sentiment)

            self.text_output.insert(END, '\n')
            self.text_output.insert(END, 'Result : \n')
            self.text_output.insert(
                END, 'Positif :' + str(list_sentiment.count(1)) + ' \n')
            self.text_output.insert(
                END, 'Netral :' + str(list_sentiment.count(0)) + ' \n')
            self.text_output.insert(
                END, 'Negatif :' + str(list_sentiment.count(-1)) + ' \n')

            # this lontong function
            from xlwt import Workbook
            from tempfile import TemporaryFile

            book = Workbook()

            try:
                activeSheet = book.add_sheet(str('fuuu'))

                i = 1
                activeSheet.write(i, 0, 'No')
                activeSheet.write(i, 1, 'Created')
                activeSheet.write(i, 2, 'Text')
                activeSheet.write(i, 3, 'Sentiment')

                i += 1

                for tweet in list_tweet:
                    activeSheet.write(i, 0, str(i - 1))
                    activeSheet.write(i, 1, tweet.time.__str__())
                    activeSheet.write(i, 2, str(tweet.get_normal_text()))
                    activeSheet.write(i, 3, tweet.sentiment)

                    i += 1
                pret = str(start_time).replace(':', '-')
                book.save('lontong' + pret + '.xls')
                book.save(TemporaryFile())

            except Exception, e:
                util.debug(str(e))