def parse_comment_files(comment_file): logger.debug(comment_file) path, file_name = os.path.split(comment_file) file_name_split = file_name.split('_') game_id = file_name_split[0] source = file_name_split[1] logger.debug('Processing comment files for game: ' + str(game_id)) records = [] with open(comment_file, 'r') as comment_in: for index, line in enumerate(comment_in): split_line = line.split('::') records.append({ 'id': str(game_id) + '_' + str(index), 'game_id': game_id, 'comment_order': index, 'commenter': split_line[0], 'comment': split_line[1], 'source': source, 'sentiment': SentimentAnalyser.classify(split_line[1]) }) return records
import SentimentAnalyser as sa print(sa.classify("ajshjsahf"))
try: mydb = mysql.connector.connect(host="localhost", user="******", passwd="", database="SentiStock") mycursor = mydb.cursor() sql = "INSERT INTO SS_News(ss_news_id,ss_source, ss_link, ss_time,ss_entry_time, ss_title, ss_image_link, ss_description, ss_sentiments, ss_symbol, ss_category,ss_full_description) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" for i in range(0, len(out_df)): sent = 'NA' symbol = 'NA' description = str(out_df['description'][i]).replace("#39", "'").strip() if out_df['title'][i] != None and out_df['title'][i] != ' ': sent = sa.classify(out_df['title'][i]) symbol = se.ExtractSymbol(out_df['title'][i]) if symbol == '': symbol = 'NA' values = (str(out_df['titleDigest'][i]), str(out_df['source'][i]), str(out_df['link'][i]), str(out_df['date'][i]), getCurrentDateTime(), out_df['title'][i], str(out_df['imagesrc'][i]), str(out_df['description'][i]), sent, symbol, str(out_df['category'][i]), str(out_df['description'][i])) mycursor.execute(sql, values) mydb.commit() count += 1 print(count, "record inserted.") # ClubbSimilar(out_df['title'][i], out_df['titleDigest'][i])
ts = pd.Timestamp(year=int(temp1[0]), month=int(temp1[1]), day=int( temp1[2]), hour=int(temp2[0]), minute=int(temp2[1]), second=int(temp2[2]), tz='utc') ts = ts.to_julian_date() cts = pd.Timestamp(year=1990, month=1, day=1, hour=0, minute=0, second=0, tz='utc') cts = cts.now() - pd.Timedelta('1 day') cts = int(cts.to_julian_date()) if(ts < cts): flag = 1 else: flag = 0 elif child.tag == 'image': imagesrc = child.text if child is not None else None if(flag == 1): continue sent = sa.classify(title) if title is not None else None symbol = se.ExtractSymbol(title)if title is not None else None if symbol == '': symbol = 'NA' print(symbol) if(flag == 0): out_df = out_df.append(pd.Series( [title, description, source, link, date, imagesrc, sent, symbol, category], index=df_cols), ignore_index=True) mydb = mysql.connector.connect(host="192.168.2.89", user="******", passwd="uatmysql", database="PythonNews") mycursor = mydb.cursor() <<<<<<< HEAD