def get_surveys(self): if not self.surveys: survey_list = get_csv_files(self.input_dir) self.surveys = [InputFile(id+1, name) for id, name in enumerate(survey_list)] logger.info("Surveys found: {}".format(len(self.surveys))) for survey in self.surveys: logger.info("\t--> {}.{}".format(survey.id, survey.name))
def main(): global args, PROBLEMS PROBLEMS = 0 parse_cli_opts() file_parser = surveyDI_lib.Parser(args.input_dir) if not args.only_to_db: # Surveys.tab file_parser.write_surveys(make_output_path(Globals.OPUT_S)) # Questions.tab file_parser.write_questions(make_output_path(Globals.OPUT_Q)) # SurveysQuestions.tab file_parser.write_surveysquestions(make_output_path(Globals.OPUT_SQ)) # Respondents.tab file_parser.write_respondents(make_output_path(Globals.OPUT_R)) # QuestionResponses.tab file_parser.write_responses(make_output_path(Globals.OPUT_QR)) # write ro MySQL if args.write_to_db: file_parser.write_all_to_mysql(Globals.SERVER_NAME, Globals.USER, Globals.PASS, Globals.DB_NAME) else: logger.info("Writing only to database.") file_parser.write_all_to_mysql(Globals.SERVER_NAME, Globals.USER, Globals.PASS, Globals.DB_NAME)
def read_csv2dict(input_file): """ Read a csv file in a csv reader dictionary :input: input_file - csv input file :return: the csv reader object """ logger.info("Reading input file {}".format(input_file)) with open(input_file) as csv_file: reader = csv.DictReader(csv_file) return reader
def get_respondents(self): self.get_questions() if not self.respondents: questions_delim = "Custom Data" self.get_surveys() for input_file in self.surveys: try: with open(input_file.name, 'rb') as csv_file: fileid = input_file.get_id() reader = csv.reader(csv_file) headers = reader.next() qstart_idx = headers.index(questions_delim) + 1 questions = headers[qstart_idx:] # delete duplicate questions but save their indexes dup_qs = [ q for q in self.qduplicates if fileid in q.fileid ] for q in dup_qs: for idx in q.order[1:]: del (questions[idx]) # process responses and respondents for row in reader: # process respondents user_details = row[:qstart_idx] user_details.insert(0, fileid) self.respondents.append(user_details) # process responses user_responses = row[qstart_idx:] # handle duplicate questions responses for q in dup_qs: q_responses = [ user_responses[idx] for idx in q.order ] valid_responses = [ r for r in q_responses if r not in q.text ] # save concatenated valid responses and delete the rest user_responses[q.order[0]] = ";".join( valid_responses) for idx in q.order[1:]: del (user_responses[idx]) user_id = user_details[1] for qtext, response in zip(questions, user_responses): q = self.get_question_by_text(qtext) self.qresponses.append((q.id, user_id, response)) except: write_exception( "While reading file '{}'".format(input_file)) logger.info("Respondents found: {}".format(len(self.respondents)))
def write_all_to_mysql(self, server_name, user, passw, db_name): logger.info("Writing do database {}:".format(db_name)) conn = connect(server_name, user, passw, db_name) with conn: cur = conn.cursor() # surveys write_sql_table(cur, db_name, self.stable, self.fheader, [(s.id, s.name) for s in self.surveys]) # questions write_sql_table(cur, db_name, self.qtable, self.qheader, [(q.id, q.text) for q in self.questions]) # surveyquestions write_sql_table(cur, db_name, self.sqtable, self.sqheader, self.squestions) # respondents write_sql_table(cur, db_name, self.rtable, self.rheader, self.respondents) # responses write_sql_table(cur, db_name, self.qrtable, self.qrheader, self.qresponses)
def write_to_csv(output_file, headers, values_list, delim='\t'): """ Write header and corresponding values to a csv file :input: desired output file name a list representing the csv file header a list of lists, each representing the corresponding values delimiter :return: None """ try: logger.info("\tWriting file {}...".format(output_file)) with open(output_file, 'wb') as csv_file: writer = csv.writer(csv_file, delimiter=delim) writer.writerow(headers) for values in values_list: writer.writerow(values) except: write_exception("Trying to write values: {}".format(values))
def get_questions(self): if not self.questions: all_questions_list = [] questions_delim = "Custom Data" self.get_surveys() for input_file in self.surveys: try: with open(input_file.name, 'rb') as csv_file: fileid = input_file.get_id() reader = csv.reader(csv_file) headers = reader.next() qstart_idx = headers.index(questions_delim) + 1 #get questions qlist = headers[qstart_idx:] all_questions_list.extend( [(text, fileid, qorder) for qorder, text in enumerate(qlist)]) except IOError as e: write_exception( "While reading file '{}'".format(input_file)) # process questions uniq_questions_list = list(set([q[0] for q in all_questions_list])) # get file id for all questions and create questions instances # for each unique question. total = len(all_questions_list) # build Parser questions list for id, text in enumerate(uniq_questions_list): q = self.get_question_by_text(text) for info in all_questions_list: (qtext, qfileid, qorder) = info if qtext == text: if not q: q = Question(id + 1, qtext, qfileid, qorder) self.questions.append(q) else: q.add_fieldid(qfileid) q.add_order(qorder) # display questions statistics commons = [q for q in self.questions if len(q.fileid) > 1] self.qduplicates = [q for q in commons if q.is_duplicate()] logger.info("Distinct questions found: {} (total {})".format( len(self.questions), total)) logger.info("Common questions to multiple input files: {}".format( len(commons) - len(self.qduplicates))) logger.info( "Duplicate questions in the same input file: {}".format( len(self.qduplicates)))
def write_sql_table(cursor, db_name, table_name, headers_list, values_list): """ Write info to an SQL table. :input: cursor - MySQLdb cursor object as obtained prior to connecting to the database db_name - name of the database to create the table in table_name - name of table to be created headers_list - table headers values_list - list of lists each containing a table row :return: None :notes: Tables will be dropped and recreated if already exist Column names will be the CSV headers with spaces and round brackets removed. """ logger.info("\n\tDropping table {}.{}...".format(db_name, table_name)) cursor.execute("DROP TABLE IF EXISTS {}.{}".format(db_name, table_name)) logger.info("\tCreating table {}.{}...".format(db_name, table_name)) db_headers = [x.translate(None, '() ') for x in headers_list] create_cmd = "CREATE TABLE {}({})".format( table_name, ", ".join(["`" + str(x) + "`" + " VARCHAR(3000)" for x in db_headers])) cursor.execute(create_cmd) logger.info("\tPopulating table {}.{}...".format(db_name, table_name)) for row in values_list: insert_cmd = "INSERT INTO {0}({1}) VALUES({2})".format( db_name + "." + table_name, ", ".join( ["`" + str(x).replace("`", "\\`") + "`" for x in db_headers]), ", ".join(["'" + str(x).replace("'", "\\'") + "'" for x in row])) try: cursor.execute(insert_cmd) except Exception as e: if len(db_headers) != len(row): write_exception(''' Different sizes in header ({}) and row ({}): header:{} row: {}'''.format(len(db_headers), len(row), db_headers, row)) else: write_exception("SQL error while executing command:\n\t{}". format(insert_cmd)) return