Exemple #1
0
 def set_values_to_db(self):
     json_files = self.get_json_files()
     for json_file in json_files:
         print_green(json_file)
         translate_language = self.__find_language(json_file)
         print_cyan(f"translate language => {translate_language}")
         with open(json_file, 'r') as temp_f:
             col_count = [len(li.split(",")) for li in temp_f.readlines()]
         column_names = [i for i in range(0, max(col_count))]
         dataset = pd.read_csv(json_file,
                               header=None,
                               delimiter="\t",
                               names=column_names,
                               error_bad_lines=False)
         self.dataset = dataset
         for index, data in dataset.iterrows():
             print_yellow(f"index => {index}")
             non_translate = None
             for i, d in enumerate(data):
                 if not pd.isnull(d):
                     if i == 0:
                         non_translate = d
                     elif i == 1:
                         translate = d
                         translate_id = self.__find_translation(
                             translate, non_translate)
                         print_gray(f"translate_id => {translate_id}")
                         if translate_id:
                             self.__update_translation(
                                 translate, non_translate, "en",
                                 translate_language, translate_id)
                         else:
                             self.__set_translation(translate,
                                                    non_translate, "en",
                                                    translate_language)
Exemple #2
0
 def get_participants(self, participant_data):
     participants = dict()
     for participant in participant_data:
         gaia_id = participant["id"]["gaia_id"]
         fallback_name = self.convert_encoding(participant["fallback_name"])
         print_cyan(
             f"gaia_id => {gaia_id}, fallback_name => {fallback_name}")
         participants[gaia_id] = fallback_name
     return participants
Exemple #3
0
 def __set_synonym(self, verb, parent_verb):
     try:
         # verb = verb.replace('"', "'")
         # parent_verb = parent_verb.replace('"', "'")
         query = """INSERT INTO synonyms_dictionary(synonym, synonym_parent) VALUES ("{}","{}")""".format(
             verb, parent_verb)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {verb}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Exemple #4
0
 def __set_message(self, event_id, text, sender_id, fallback_name,
                   timestamp, conversation_id, event_type):
     try:
         text = text.replace('"', "'")
         query = """INSERT INTO hangouts_chat VALUES ("{}","{}","{}","{}","{}","{}","{}")""".format(
             event_id, text, int(sender_id), fallback_name, int(timestamp),
             conversation_id, event_type)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {event_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Exemple #5
0
 def __set_message(self, guid, sender_name, timestamp_ms, message_type,
                   content, photos_list):
     try:
         content = content.replace('"', "'")
         query = """INSERT INTO facebook_chat VALUES ("{}","{}","{}","{}","{}","{}")""".format(
             guid, content, sender_name, timestamp_ms, message_type,
             photos_list)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {guid}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Exemple #6
0
 def __set_translation(self, translate, non_translate, language,
                       translate_language):
     try:
         translate = translate.replace('"', "'")
         non_translate = non_translate.replace('"', "'")
         query = """INSERT INTO translator(translate, non_translate, language, translate_language) VALUES ("{}","{}","{}","{}")""".format(
             translate, non_translate, language, translate_language)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {translate}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Exemple #7
0
 def __set_message(self, question_id, question, document_id, document_title,
                   sentence_id, sentence, label):
     try:
         if type(question) is str:
             question = question.replace('"', "'")
         if type(sentence) is str:
             sentence = sentence.replace('"', "'")
         query = """INSERT INTO wiki_questions_and_answers VALUES ("{}","{}","{}","{}","{}","{}","{}")""".format(
             question_id, question, document_id, document_title,
             sentence_id, sentence, label)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {document_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
 def __set_message(self, article_title, question, answer,
                   difficulty_from_questioner, difficulty_from_answerer,
                   article_file):
     try:
         if type(question) is str:
             question = question.replace('"', "'")
         if type(answer) is str:
             answer = answer.replace('"', "'")
         query = """INSERT INTO questions_and_answers VALUES ("{}","{}","{}","{}","{}","{}")""".format(
             article_title, question, answer, difficulty_from_questioner,
             difficulty_from_answerer, article_file)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {article_title}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Exemple #9
0
    def set_values_to_db(self):
        json_files = self.get_json_files()
        for json_file in json_files:
            print_green(json_file)
            with open(json_file, buffering=1000, encoding='iso-8859-1') as f:
                j = json.load(f)
                print_blue(j)
                participants = j["participants"]
                for index, message in enumerate(reversed(j["messages"])):
                    sender_name = self.convert_encoding(message["sender_name"])
                    timestamp_ms = message["timestamp_ms"]
                    message_type = self.convert_encoding(message["type"])
                    sender_message_name = self.convert_encoding(
                        participants[0]['name'])
                    if sender_message_name != "Konrad Uciechowski":
                        guid = f"{index}_{sender_message_name}_{timestamp_ms}"
                    else:
                        guid = f"{index}_facebook_user_{timestamp_ms}"

                    print_cyan(guid)

                    try:
                        content = self.convert_encoding(message["content"])
                    except Exception as e:
                        print_yellow(f"cannot get content, {e}")
                        content = ""

                    try:
                        photos = message["photos"]
                        photos_list = list()
                        for photo in photos:
                            photo_uri = photo["uri"]
                            photos_list.append(photo_uri)
                        photo_str = ', '.join(
                            [str(elem) for elem in photos_list])
                    except Exception as e:
                        print_yellow(f"cannot get photos, {e}")
                        photo_str = ""

                    exists = self.__find_message(guid)
                    if exists:
                        self.__update_message(guid, sender_name, timestamp_ms,
                                              message_type, content, photo_str)
                    else:
                        self.__set_message(guid, sender_name, timestamp_ms,
                                           message_type, content, photo_str)
Exemple #10
0
 def insert_parent(self, has_parent, parent_id, comment_id, parent, comment,
                   subreddit, time, score):
     try:
         query = """INSERT INTO reddit_comments """
         if has_parent:
             query += """(parent_id, comment_id, parent, comment, subreddit, unix, score) VALUES ("{}", "{}", "{}", "{}", "{}", "{}", "{}")""".format(
                 parent_id, comment_id, parent, comment, subreddit,
                 int(time), score)
         else:
             query += """(parent_id, comment_id, comment, subreddit, unix, score) VALUES ("{}", "{}", "{}", "{}", "{}", "{}")""".format(
                 parent_id, comment_id, comment, subreddit, int(time),
                 score)
         print_cyan(f"insert => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(
             f"cannot insert parent comment of id {comment_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Exemple #11
0
 def __set_message(self, ROWID, guid, text, handle_id, service, account, date, date_read,
                   date_delivered,
                   is_delivered,
                   is_finished, is_emote, is_from_me, is_empty, is_delayed, is_auto_reply, is_prepared, is_read,
                   is_system_message, is_sent, has_dd_results, cache_has_attachments, item_type, group_title,
                   is_expirable, message_source, ck_record_id, destination_caller, is_spam):
     try:
         text = text.replace('"', "'")
         query = """INSERT INTO imessage_chat VALUES ("{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}")""".format(
             guid,
             text,
             int(handle_id),
             int(date),
             int(date_read),
             int(date_delivered),
             int(is_delivered),
             int(is_finished),
             int(is_emote),
             int(is_from_me),
             int(is_empty),
             int(is_delayed),
             int(is_auto_reply),
             int(is_prepared),
             int(is_read),
             int(is_system_message),
             int(is_sent),
             int(has_dd_results),
             int(is_spam),
             int(cache_has_attachments),
             int(item_type),
             group_title,
             int(is_expirable),
             int(message_source),
             destination_caller,
             ck_record_id,
             service,
             account,
             ROWID)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {guid}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()