def gerar_planilha_gastos_terceiros(compras): titulo = 'Gastos Terceiros' rows = [[titulo]] for compra in compras: categoria = compra.categoria.descricao descricao = compra.descricao if compra.parcelas > 1: descricao += ' %02d/%02d' % (compra.parcela_atual, compra.parcelas) valor = format_moeda_planilha(compra.valor_real) rows.append([categoria, descricao, valor]) for value in compras.values('categoria__descricao').annotate( Sum('valor_real')): rows.append([ 'Total ' + value['categoria__descricao'], '', format_moeda_planilha(value['valor_real__sum']) ]) total = compras.aggregate(Sum('valor_real')) rows.append(['Total', '', format_moeda_planilha(total['valor_real__sum'])]) io = BytesIO() save_data(io, {titulo: rows}) io.seek(0) return io
def valid_records(): global totaljobsdict global jobsitedict # data = OrderedDict() # data.update({"File unique id list": [[1, 2, 3], [4, 5, 6]]}) # data.update({"Invalid records per site": [[]]}) # data.update({"Valid records per site": [[]]}) # save_data("pcdataanalysisresults.ods", data) # subtracting dictionary key values to get valid records per site validjobsdict = { key: totaljobsdict[key] - jobsitedict.get(key, 0) for key in totaljobsdict.keys() } # '''utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Total valid records per site: ') # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', str(validjobsdict))''' listdata = [] sublist = [] for val in validjobsdict: sublist = [val, validjobsdict[val]] listdata.append(sublist) data.update({"Valid records per site": listdata}) save_data(analysis_file, data)
def run_helper(args): lst, thread, ods_dir = args pre = 'run' + str(thread) data = {} drop_count = 0 for filename in lst: log('Processing', filename, pre=pre) reponame = filename[:-5] inpath = os.path.join(INDIR, filename) metrics = calcMetrics(inpath) if metrics != None: # None indicates an empty group (or a group with ony 1 person) data[reponame] = metrics else: log('Dropped', filename, pre=pre) drop_count += 1 # output the aggregated metrics metric_labels = list(list(data.values())[0].keys()) symlog_header = ['repository'] + metric_labels symlog_data = [] for name, metrics in data.items(): lst = [metrics[label] for label in metric_labels] symlog_data.append([name] + lst) out_data = OrderedDict() out_data.update({"Big5": [symlog_header, *symlog_data]}) save_data(os.path.join(ods_dir, 'Big5_metrics_{0}.ods'.format(thread)), out_data) log(drop_count, 'repositories were dropped', pre=pre)
def importods(inp, outp): data = get_data(inp, encoding="utf-8") sheets = {} headers = [ "Datum", "Vorname", "Nachname", "Strasse", "Hausnummer", "PLZ", "Stadt", "Telefon", "Code" ] #print(json.dumps(data)) for key in data.keys(): #print("key:", key) val1 = data.get(key) #print("val1:", val1) for l1 in val1: #print(" l1:", l1) if len(l1) == 0 or l1[0] == "Datum": continue datum = l1[0][0:10] if not datum in sheets: sheets[datum] = [] l1 = l1[0:8] # Datum - Telefon l1.append("") # add empty code value sheets[datum].append(l1) #print("sheets1", sheets) for datum in sheets.keys(): sheets[datum].sort(key=lambda l1: l1[0]) sheets[datum].insert(0, headers) #print("sheets2", sheets) #print(json.dumps(sheets, indent=4)) save_data(outp, sheets, encoding="utf-8")
def test_issue_10(): test_file_name = "test_issue_10.ods" from pyexcel_ods3 import save_data content = {"test": [[1, 2]]} save_data(test_file_name, content) save_data(test_file_name, content) assert os.path.exists(test_file_name) assert os.path.exists(test_file_name + ".bak") is False os.unlink(test_file_name)
def write_typo_ods_sheet(self, vat_number, typo): content_sheet = get_data(self.referencialSuppplierSpreadsheet) for line in content_sheet['Fournisseur']: if line and line[1] == vat_number: try: line[8] = typo except IndexError: line.append(typo) save_data(self.referencialSuppplierSpreadsheet, content_sheet)
def save(self): if not self.filename or self.filename == '': raise RuntimeError(('No file name specified for ODS document. ' 'Unable to save ODS document.')) if not self.filename.endswith('.ods'): raise RuntimeError('Not an ODS file: {}'.format(self.filename)) records = [tuple(chord) for chord in self._chords] data = OrderedDict() data.update({'Chords': records}) py_ods.save_data(self.filename, data)
def __call__(self): data = { "sort_on": "Date", "sort_order": "reverse", "path": "/".join(self.context.getPhysicalPath()), } for k in self.request.form: v = self.request.form.get(k, None) if v and v != "None": data[k] = v if data: query = self.get_query(data=data) brains = self.conflict_manager.unrestricted_prenotazioni(**query) else: brains = [] data = { "Sheet 1": [[ "Nome completo", "Stato", "Postazione", "Tipologia prenotazione", "Email", "Data prenotazione", "Codice prenotazione", ]] } for brain in brains: obj = brain.getObject() data["Sheet 1"].append([ brain.Title, self.get_prenotazione_state(obj), getattr(obj, "gate", "") or "", getattr(obj, "tipologia_prenotazione", "") or "", getattr(obj, "email", "") or "", self.prenotazioni_week_view.localized_time(brain["Date"]) + " - " + self.prenotazioni_week_view.localized_time( brain["Date"], time_only=True), obj.getBookingCode(), ]) now = DateTime() filename = "prenotazioni_{}.ods".format(now.strftime("%Y%m%d%H%M%S")) filepath = "{0}/{1}".format(tempfile.mkdtemp(), filename) save_data(filepath, data) streamed = filestream_iterator(filepath) mime = "application/vnd.oasis.opendocument.spreadsheet" self.request.RESPONSE.setHeader( "Content-type", "{0};charset={1}".format(mime, "utf-8")) self.request.RESPONSE.setHeader("Content-Length", str(len(streamed))) self.request.RESPONSE.setHeader( "Content-Disposition", 'attachment; filename="{}"'.format(filename)) return streamed
def write_variables(): global totalrecords global invalidrecords global emptydesc global incompletedesc global smalldesc global nonedesc global nodesc global totaljobsdict global jobsitedict # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Total invalid records: ' + str(invalidrecords)) # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Total empty descriptions: ' + str(emptydesc)) # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Total incomplete descriptions: ' + # str(incompletedesc)) # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Total descriptions below 20 chars: ' + # str(smalldesc)) # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Total null descriptions: ' + str(nonedesc)) # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Total records without description tag: ' + # str(nodesc)) # '''utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Invalid records per site: ') # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', str(jobsitedict))''' listdata = [] sublist = [] for val in jobsitedict: sublist = [val, jobsitedict[val]] listdata.append(sublist) data.update({"Invalid records per site": listdata}) # '''utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Total records: ' + str(totalrecords)) # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', 'Total records per site: ') # utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile, # 'a', str(totaljobsdict))''' listdata = [] sublist = [] for val in totaljobsdict: sublist = [val, totaljobsdict[val]] listdata.append(sublist) data.update({"Total records per site": listdata}) # /mnt/nlpdata/pcdataanalysisresults.ods or # config.ConfigManager().PCDataAnalysisResultsFile data.update({"File unique id list": listdata_uniqueids}) save_data(analysis_file, data)
def emp_export_ods(czn, fields): """ :param czn: :param fields: :return: """ all_fields = Employer._meta.get_fields(include_parents=False, include_hidden=False) now = datetime.now() file_name = 'export' + now.strftime('%y%m%d-%H%M%S') + '.ods' file = settings.EXPORT_FILE + file_name if czn != '0': emps = Employer.objects.filter(Owner__user=czn) else: emps = Employer.objects.all() data_field = [] for field in all_fields: if field.name in fields: data_field.append(field.verbose_name) data_emp = [data_field] for emp in emps: data_field = [] for field in all_fields: if field.name in fields: value = getattr(emp, field.name) if value: if isinstance(value, UserProfile): data_field.append(value.user.get_full_name()) elif isinstance(value, datetime) or isinstance(value, date): data_field.append(value.strftime('%d-%m-%G')) else: if field.name == 'Status': data_field.append(dict(STATUS_CHOICES).get(emp.Status)) else: data_field.append(value) else: data_field.append('') data_emp.append(data_field) data = OrderedDict() data.update({'Данные': data_emp}) save_data(file, data) fp = open(file, 'rb') response = HttpResponse(fp.read()) fp.close() file_type = mimetypes.guess_type(file) if file_type is None: file_type = 'application/octet-stream' response['Content-Type'] = file_type response['Content-Length'] = str(os.stat(file).st_size) response['Content-Disposition'] = "attachment; filename=" + file_name os.remove(file) return response
def helper(exp_name): orig_path = os.path.join(SYM, exp_name, 'SYMLOG_metrics_original.ods') test_path = os.path.join(SYM, exp_name, 'SYMLOG_metrics_test.ods') out_path = os.path.join(SYM, exp_name, 'SYMLOG_metrics.ods') orig_data = get_data(orig_path)['SYMLOG'] # print('orig_path',orig_path) # code.interact(local=dict(globals(),**locals())) test_data = get_data(test_path)['SYMLOG'] # code.interact(local=dict(globals(),**locals())) all_data = orig_data[0:1] + [ row for row in orig_data + test_data if row[0] != 'repository' ] save_data(out_path, {'SYMLOG': all_data})
def save_new_sheet(name_file, sheet_name, content): data = OrderedDict() first_line_sheet = [ "Qual algoritmo", "Qual arquivo", "Qual instancia binaria", "Qual instancia decimal", "Quantos frascos", "Complexidade Teorica", "Tempo de CPU (ms)", "Razao" ] final_data = [] final_data.append(first_line_sheet) for content_part in content: final_data.append(content_part) data.update({sheet_name: final_data}) save_data(name_file, data)
def run(self): global tot document = {} document['sessao'] = [self.labels] + self.documents_to_create['events'] # TODO: incluir lista de formulas abaixo apos os dados do evento # # ['=VLOOKUP(C2;$legenda.$A$2:$K$20;3;0)&"("&K2&" "&L2&" "&M2&")"', # '=VLOOKUP($C2;$legenda.$A$2:$K$20;2;0)', # '=IF(VLOOKUP($C2;$legenda.$A$2:$K$20;9;0)<>"vazio";VLOOKUP($C2;$legenda.$A$2:$K$20;9;0);"")', # '=IF(VLOOKUP($C2;$legenda.$A$2:$K$20;10;0)<>"vazio";VLOOKUP($C2;$legenda.$A$2:$K$20;10;0);"")', # '=IF(VLOOKUP($C2;$legenda.$A$2:$K$20;11;0)<>"vazio";VLOOKUP($C2;$legenda.$A$2:$K$20;11;0);"")', # '=IF(VLOOKUP($C2;$legenda.$A$2:$K$20;7;0)<>"vazio";VLOOKUP($C2;$legenda.$A$2:$K$20;7;0);"")'] pods.save_data( f'./Eventos/{self.documents_to_create["document_name"]}', document) tot += 1 print(f'Almost {tot} has already done.')
def FILE_export(self): try: if all([n == 0 for n in self.nums]): txt = "You cannot export an empty sheet." tkMess.showwarning(title="Phonologiter", message=txt) return try: op = "New Phonology.ods" if self.opened is None else self.opened if op[-4:] == ".phn": op = op[:-4] + ".ods" f = tkFile.asksaveasfile(mode="wb", initialfile=op, defaultextension=".ods") except PermissionError as e: txt = "Error saving file. That file may be open in another " +\ "program. Please close out of that program before trying " +\ "again." tkMess.showwarning(title="Phonologiter", message=txt) return if f is None: return if f.name == "": txt = "You must name your Phonology before exporting." tkMess.showwarning(title="Phonologiter", message=txt) return False conIpa = self.manifest_IPA_cons(self.root, True) vowIpa = self.manifest_IPA_vows(self.root, True) othIpa = self.manifest_IPA_oths(self.root, True) file = OrderedDict() # from collections import OrderedDict if self.nums[0] > 0: file.update({"Consonants": self.EXP_compose(conIpa)}) if self.nums[1] > 0: file.update({"Vowels": self.EXP_compose(vowIpa)}) if self.nums[2] > 0: file.update({"Other Symbols": self.EXP_compose(othIpa, True)}) ods.save_data(f, file) f.close() except Exception as e: self.throw_err("#FE_UNKERR", e)
def write_ods(self, file: Union[str, BinaryIO]) -> None: """ Writes an ODS (OpenOffice spreadsheet document) to a file. Args: file: filename or file-like object """ if ODS_VIA_PYEXCEL: # use pyexcel_ods3 data = self._get_pyexcel_data(convert_for_pyexcel_ods3) pyexcel_ods3.save_data(file, data) else: # use odswriter if isinstance(file, str): # it's a filename with open(file, "wb") as binaryfile: return self.write_ods(binaryfile) # recurse once # noinspection PyCallingNonCallable with ODSWriter(file) as odsfile: valid_name_dict = self.get_pages_with_valid_sheet_names() for page, title in valid_name_dict.items(): sheet = odsfile.new_sheet(name=title) page.write_to_odswriter_ods_worksheet(sheet)
def gera_relatorio(arquivo, cursor, cabecalho): dicionario = {} lista = [] #subprocess.call("clear", shell=True) print("Gerando PDF. Aguarde...") lista.append(cabecalho) temp = [] for i in range(0, len(cabecalho)): temp.append(" ") lista.append(temp) for row in cursor.fetchall(): temp = [] for x in range(0, len(row)): temp.append(str(row[x])) lista.append(temp) dicionario['Relatorio'] = lista pods.save_data(arquivo + ".ods", dicionario) print("\n") subprocess.call("soffice --headless --convert-to pdf *.ods", shell=True) subprocess.call("rm *.ods", shell=True)
def download_ranking(request, collection_id): """ Download ODS file with the ranking. Receives the following GET parameters: * group (number, required): group ID * start (date YYYY-MM-DD, required) * end (date YYYY-MM-DD, required) """ collection = get_object_or_404(Collection, pk=collection_id) result_form = DownloadRankingForm(request.GET) if not result_form.is_valid(): return HttpResponseNotFound(str(result_form.errors)) # Invalid parameters group = get_object_or_404(Group, pk=result_form.cleaned_data['group']) start = result_form.cleaned_data.get('start') end = result_form.cleaned_data.get('end') # Extends 'end' to 23:59:59 to cover today's latest submissions. Otherwise, ranking is not # updated until next day (as plain dates have time 00:00:00) end = datetime(end.year, end.month, end.day, 23, 59, 59) start = datetime(start.year, start.month, start.day, 0, 0, 0) sheet_rows = list() # Sheet header: collection name, dates and group in first 4 rows sheet_rows.append([gettext('Colección'), collection.name_md]) sheet_rows.append([gettext('Grupo'), str(group)]) sheet_rows.append([gettext('Desde'), str(start)]) sheet_rows.append([gettext('Hasta'), str(end)]) sheet_rows.append([]) # Table header [Pos., User, Exercises..., Score, Solved] in row 6 sheet_rows.append([gettext("Pos."), gettext("Usuario")] + [problem.title_md for problem in collection.problems()] + [gettext("Puntuación."), gettext("Resueltos")]) # Ranking row by row for user in collection.ranking(start, end, group): sheet_rows.append([user.pos, user.username] + [cell_ranking(problem) for _, problem in user.results.items()] + [user.score, user.num_solved]) # Saves ODS to memory and includes it in the response buffer = io.BytesIO() save_data(buffer, {str(group): sheet_rows}) buffer.seek(0) # Moves to the beginning of the buffer return FileResponse(buffer, as_attachment=True, filename='ranking.ods')
def run(exp_name, reorient=False, num_threads=4): pre = 'run' log('running', exp_name, pre=pre) filenames = os.listdir(INDIR) data = {} # {reponame:{metric_name: metric_value}} ods_dir = os.path.join(OUTDIR, exp_name) if not os.path.exists(ods_dir): os.mkdir(ods_dir) plots_dir = os.path.join(OUTDIR, exp_name, 'plots') if not os.path.exists(plots_dir): os.mkdir(plots_dir) # allocate the inputs for multithreading filenames_lsts = [] for i, filename in enumerate(filenames): index = i % num_threads if index == len(filenames_lsts): filenames_lsts.append([]) filenames_lsts[index].append(filename) inputs = [[lst, i, plots_dir, ods_dir, reorient] for i, lst in enumerate(filenames_lsts)] # run the multithreadable function pool = Pool(processes=num_threads) pool.map(run_helper, inputs) # aggregate the data together filepaths = [ os.path.join(ods_dir, 'SYMLOG_metrics_{0}.ods'.format(i)) for i in range(num_threads) ] all_data = get_data(filepaths[0]) key = list(all_data.keys())[0] for filepath in filepaths[1:]: new_data = get_data(filepath) all_data[key].extend( new_data[key] [1:]) # need to make sure to exclude the header of each file out_path = os.path.join(ods_dir, 'SYMLOG_metrics.ods') save_data(out_path, all_data) log('done the run', pre=pre) print('done', exp_name)
def update_supplier_ods_sheet(self, _db): content_sheet = get_data(self.referencialSuppplierSpreadsheet) res = _db.select({ 'select': ['*'], 'table': ['suppliers'], 'where': ['status = ?'], 'data': ['ACTIVE'], }) try: sheet_name = False for sheet in content_sheet: sheet_name = sheet if sheet_name: content_sheet[sheet_name] = content_sheet[sheet_name][:1] for supplier in res: line = [ supplier['name'] if supplier['name'] is not None else '', supplier['vat_number'] if supplier['vat_number'] is not None else '', supplier['siret'] if supplier['siret'] is not None else '', supplier['siren'] if supplier['siren'] is not None else '', supplier['adress1'] if supplier['adress1'] is not None else '', supplier['adress2'] if supplier['adress2'] is not None else '', supplier['postal_code'] if supplier['postal_code'] is not None else '', supplier['city'] if supplier['city'] is not None else '', supplier['typology'] if supplier['typology'] is not None else '', supplier['company_type'] if supplier['company_type'] is not None else '' ] content_sheet[sheet_name].append(line) except IndexError: self.Log.error("IndexError while updating ods reference file.") save_data(self.referencialSuppplierSpreadsheet, content_sheet)
def add_word(self, newWord="None"): """ Why the optional parameter: This function is called in two different scenarios - When the choice is 1 : for adding a new word - Searching for a word, the word is not found it is automatically added in that case. The set is popped and words are put in a list: Since I want to randomize the order of the words I am popping the words - set.pop() returns a random element An OrderedDict needs to be created for saving the data to ods sheet - The keys are the sheet names - Vales are the data in the sheet - each sublist is a row - values in each sublist correspond to columns """ # print(self.count) if newWord == 'None': newWord = input("Enter a word to add :\t") # print(self.words) self.words.add(newWord) temporaryHolder = self.words shuffledList = [] for i in range(0, self.count): try: shuffledList.append([(self.words.pop())]) # print("pop", self.words.pop()) except KeyError: pass w = dict() writeData = OrderedDict() writeData.update({"Sheet1": shuffledList}) save_data(self.file, writeData) print( newWord, " has been added" if len(shuffledList) > self.count else "is already available") self.count += 1 if len(shuffledList) > self.count else 0 self.total_words() self.words = temporaryHolder
def saveOds(self): # self.lexicon data_list = list(self.lexicon) # data_list = data_list[0:5] data_list.sort() wordlexicon = [] for i in range(0, len(data_list)): wordlexicon.append( [data_list[i], translate2chinese(data_list[i]), translate2english(data_list[i])] ) if i % 100 == 0: data = OrderedDict() data.update({"lexicon": wordlexicon }) save_data("lexicon.ods", data) data = OrderedDict() data.update({"lexicon": wordlexicon }) save_data("lexicon.ods", data) pass
data = get_data(tickets_file) redmine = Redmine(url, key=apikey, requests={'verify': False}) tickets = data.get(sheet) for t in tickets[1:]: custom_values = [{ 'id': cv.field_id, 'value': t[cv.column_idx] if cv.column_idx < len(t) else '' } for cv in custom_fields] if t[issue_id] != '': print(f'Issue {t[issue_id]} already exists') # @TODO implement ticket update continue issue = redmine.issue.create(project_id=t[project_id], subject=t[subject], tracker_id=t[tracker_id], description=t[description], priority_id=t[priority_id], custom_fields=custom_values) id = issue['id'] t[issue_id] = id print(f'Ticket {id} created!') save_data(tickets_file, data)
def test_issue_11(): test_file = "test_file.ods" from pyexcel_ods3 import save_data save_data(test_file, {"generator": data_gen()}) os.unlink(test_file)
def do_summary(): print("Doing Summary.\n") div_data = get_all_divs() main_divs = div_data[0] sub_divs = div_data[1] connected_divs = div_data[2] data = get_parsed_data(main_divs, sub_divs, connected_divs) if input("Would you like to continue? (y/n):\n") == 'n': print("\nRestarting.\n") return print("Working.\n") print("Creating Summary data.\n") summary = OrderedDict() for div in main_divs: summary.update({ div + " Div Team": [["Team", "Quiz", "Place", "Score", "Points", "Errors", "Quiz No"]] }) summary.update({ div + " Div Quizzer": [[ "Quizzer", "Team", "Quiz", "Points", "Errors", "Jumps", "Quiz No" ]] }) all_count = 0 for div in main_divs: print("Moving " + div + " Div Team.") count = 0 for key in data.keys(): quiz = data[key][1][1] if get_div(quiz, connected_divs) == div: for i in range(3): summary[div + ' Div Team'].append(data[key][i + 1][0:6]) summary[div + ' Div Team'][-3].append( summary[div + ' Div Team'][-3][1]) count = count + 1 print(Fore.CYAN + (Style.BRIGHT if count != 0 else '') + str(count) + ' Entries moved in ' + div + ' Div Team\n' + Style.RESET_ALL) all_count = all_count + count print("Moving " + div + " Div Quizzer.") count = 0 for key in data.keys(): quiz = data[key][1][1] if get_div(quiz, connected_divs) == div: for i in range(15): summary[div + ' Div Quizzer'].append(data[key][i + 6][0:6]) summary[div + ' Div Quizzer'][-15].append( summary[div + ' Div Quizzer'][-15][2]) count = count + 1 print(Fore.CYAN + (Style.BRIGHT if count != 0 else '') + str(count) + ' Entries moved in ' + div + ' Div Quizzer\n' + Style.RESET_ALL) summary_name = input( "How would you like to save the summary file (no .ods extension)?\n") while True: try: print("Saving.\n") pe.save_data(summary_name + ".ods", summary) break except: print(Fore.RED + "Saving failed, you may have a file of the same name open." + Style.RESET_ALL) input("Press enter to try again.\n") print(Fore.GREEN + "Saving succeeded.\n" + Style.RESET_ALL) print("Returning to main.\n")
def do_draw(): print("Doing draw.\n") div_data = get_all_divs() main_divs = div_data[0] sub_divs = div_data[1] connected_divs = div_data[2] data = get_parsed_data(main_divs, sub_divs, connected_divs) if input("Would you like to continue? (y/n):\n") == 'n': print("\nRestarting.\n") return print("Working.\n") print("Creating Draw data.\n") draw = OrderedDict() for div in main_divs: draw.update({ div + " Div": [[ "", "---Place---", "---Score---", "---Points---", "---Errors---", "---Place---", "---Score---", "---Points---", "---Errors---", "---Place---", "---Score---", "---Points---", "---Errors---", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors" ]] }) print("Moving " + div + " Div.") count = 0 for key in data.keys(): quiz = data[key][1][1] if get_div(quiz, connected_divs) == div and quiz[-1] != 'W' and quiz[-1] != 'X' and quiz[-1] != 'Y' and \ quiz[-1] != 'Z': for i in range(3): team_name = data[key][i + 1][0] index = get_team_index(draw[div + " Div"], team_name) if index == -1: draw[div + " Div"].append([team_name]) for j in range(4): draw[div + " Div"][index].append(data[key][i + 1][j + 2]) count = count + 1 print(Fore.CYAN + (Style.BRIGHT if count != 0 else '') + str(count) + ' Entries moved in ' + div + ' Div\n' + Style.RESET_ALL) draw.update({ "WXYZ": [[ "", "---Place---", "---Score---", "---Points---", "---Errors---", "---Place---", "---Score---", "---Points---", "---Errors---", "---Place---", "---Score---", "---Points---", "---Errors---", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors", "Place", "Score", "Points", "Errors" ]] }) print("Moving WXYZ.") count = 0 for key in data.keys(): quiz = data[key][1][1] if quiz[-1] == 'W' or quiz[-1] == 'X' or quiz[-1] == 'Y' or quiz[ -1] == 'Z': for i in range(3): team_name = data[key][i + 1][0] index = get_team_index(draw["WXYZ"], team_name) if index == -1: draw["WXYZ"].append([team_name]) for j in range(4): draw["WXYZ"][index].append(data[key][i + 1][j + 2]) count = count + 1 print(Fore.CYAN + (Style.BRIGHT if count != 0 else '') + str(count) + ' Entries moved in WXYZ\n' + Style.RESET_ALL) draw_name = input( "How would you like to save the draw file (no .ods extension)?\n") while True: try: print("Saving.\n") pe.save_data(draw_name + ".ods", draw) break except: print(Fore.RED + "Saving failed, you may have a file of the same name open." + Style.RESET_ALL) input("Press enter to try again.\n") print(Fore.GREEN + "Saving succeeded.\n" + Style.RESET_ALL) print("Returning to main.\n")
res = requests.get(url) if (res.status_code != 200) : print(f'Connection failed. Code : {res.status_code}') continue soup = BeautifulSoup(res.content, 'html.parser') title = soup.find_all('a' , {'class': 'title'}) price = soup.find_all('h4', {'class': 'price'}) for j in range(len(title)) : # Create 3 Sheets : Laptops, Tablets, Phones if (url == base_url + 'computers/laptops') : lap_list.append((title[j].text, price[j].text)) if (url == base_url + 'computers/tablets/') : tab_list.append((title[j].text, price[j].text)) if (url == base_url + 'phones/touch') : phone_list.append((title[j].text, price[j].text)) dic_list['Laptops'] = lap_list dic_list['Tablets'] = tab_list dic_list['Phones'] = phone_list filename = 'opti_scrap.ods' save_data(filename, dic_list) print(f'Your file {filename} has been created !')
def save_to_spreadsheet(file_name, data): pyexcel_ods3.save_data(file_name, data)
def write_ods(self, filepath): pyexcel_ods3.save_data(filepath, self.sheets)
def aggregateData(exp_name, pre=''): pre = pre + '.aggregateData' log('started', pre=pre) # congregate the SYMLOG metrics SYMLOG_metrics_path = os.path.join(SYMLOG_METRICS_DIR, exp_name, 'SYMLOG_metrics.ods') symlog_data = read_data(SYMLOG_metrics_path)['SYMLOG'] headers = symlog_data[0] metrics = symlog_data[0][1:] repo_index = symlog_data[0].index('repository') symlog_dict = {} for row in symlog_data[1:]: #exclude the header reponame = row[repo_index].replace('__', '/') value = {metric: row[headers.index(metric)] for metric in metrics} symlog_dict[reponame] = value # congregate the big5 metrics Big5_metrics_path = os.path.join(BIG5_METRICS_DIR, exp_name, 'Big5_metrics.ods') big5_data = read_data(Big5_metrics_path)['Big5'] headers = big5_data[0] metrics = big5_data[0][1:] repo_index = big5_data[0].index('repository') big5_dict = {} for row in big5_data[1:]: #exclude the header reponame = row[repo_index].replace('__', '/') value = {metric: row[headers.index(metric)] for metric in metrics} big5_dict[reponame] = value # congregate the esem training data ESEM_TRAIN_path esem_training_data = get_data(ESEM_TRAIN_path)['trainingset.csv'] headers = esem_training_data[0] metrics = esem_training_data[0][1:] repo_index = headers.index('Repository') esem_train_dict = {} for row in esem_training_data[1:]: #exclude the header reponame = row[repo_index] value = {metric: row[headers.index(metric)] for metric in metrics} value['Failed'] = 'inactive' if value[ 'Failed'] == 'Yes' else 'active' if value[ 'Failed'] == 'No' else 'Error' # change to the status of the project to be consistent with ESEM - final.ods spreadsheet esem_train_dict[reponame] = value # congregate the ESEM labels # folds data esem_data = read_data(ESEM_path) esem_model = esem_data['Model'] esem_prec = esem_data['Precision'] esem_rec = esem_data['Recall'] # remove empty lines esem_model = [x for x in esem_model if len(x) != 0] esem_prec = [x for x in esem_prec if len(x) != 0] esem_rec = [x for x in esem_rec if len(x) != 0] folds_dict = {} testing_dict = {} prec_dict = {} rec_dict = {} headers = esem_model[0] for row in esem_model[1:]: if len(row) != 0: name = row[headers.index('Repository')] status = row[headers.index('Archived')] active = 'active' if status == 'Active' else 'inactive' #False is if it is 'Archived' or 'FSE' (which in the paper they say is inactive) folds_dict[name] = {'status': active} log('processed', name, pre=pre) else: log('error, zero length row', row, pre=pre) headers = esem_prec[0] for row in esem_prec[1:]: if len(row) != 0: name = row[headers.index('Repository')] result = row[headers.index('Result')] if result not in ['TP', 'FP']: print('unknown result', result) status = 'inactive' if result == 'TP' else 'active' if result == 'FP' else 'ERROR unknown' prec_dict[name] = {'status': status} headers = esem_rec[0] for row in esem_rec[1:]: if len(row) != 0: name = row[headers.index('Repository')] classification = row[headers.index('Classification')] if classification not in ['TP', 'FN']: print('unknown classification', classification) status = 'inactive' if classification in ['TP', 'FN' ] else 'ERROR unknown' rec_dict[name] = {'status': status} testing_dict.update(prec_dict) testing_dict.update(rec_dict) # do some data quality checks on the data print('Performing data quality check') train_repos = list(esem_train_dict.keys()) print(len(train_repos), 'repos in trainingset.csv') esem_model_repos = list(folds_dict.keys()) print(len(esem_model_repos), 'repos in ESEM - final.ods.model') common = set(train_repos + esem_model_repos) print( len(common), 'repos common to both...', 'GOOD' if len(common) == len(train_repos) == len(esem_model_repos) else 'BAD') agree_repos = [ x for x in common if esem_train_dict[x]['Failed'] == folds_dict[x]['status'] ] print(len(agree_repos), 'repos have the same status...', 'GOOD' if len(agree_repos) == len(common) else 'BAD') print('Data quality check complete') # output the data out_data = OrderedDict() out_path = os.path.join(OUT_DIR, exp_name, 'experiment_data.ods') symlog_metrics = list(list(symlog_dict.values())[0].keys()) big5_metrics = list(list(big5_dict.values())[0].keys()) esem_train_metrics = list(list(esem_train_dict.values())[0].keys()) print(len(esem_train_metrics), 'metrics in esem_train_metrics') esem_train_metrics = [ x for x in esem_train_metrics if x != 'Failed' ] # exclude the column that has the status as this isn't actually one of the metrics print(len(esem_train_metrics), 'ACTUAL metrics in esem_train_metrics') for label, d in zip(['Folds', 'Testing', 'Precision', 'Recall'], [folds_dict, testing_dict, prec_dict, rec_dict]): # get the common repos between this and symlog common_repos = [] symlog_only = [] for key in d: if config_include_test_set: if key in symlog_dict: # symlog_dict includes ALL repositories including those used in ESEM's "empirical validation" common_repos.append(key) else: symlog_only.append(key) else: if key in symlog_dict and key in folds_dict: # i.e. exclude the testing sets for now common_repos.append(key) else: symlog_only.append(key) print(len(symlog_only), 'repos not in symlog_dict for d', label) if not os.path.exists(os.path.join(OUT_DIR, exp_name)): os.mkdir(os.path.join(OUT_DIR, exp_name)) header = [ 'reponame', 'esem_status', *symlog_metrics, *big5_metrics, *esem_train_metrics ] data = [] for reponame in common_repos: esem_status = d[reponame]['status'] symlog_scores = [ symlog_dict[reponame][metric] for metric in symlog_metrics ] big5_scores = [ big5_dict[reponame][metric] for metric in big5_metrics ] esem_train_scores = [ esem_train_dict[reponame][metric] for metric in esem_train_metrics ] lst = [reponame, esem_status ] + symlog_scores + big5_scores + esem_train_scores data.append(lst) out_data.update({label: [header, *data]}) save_data(out_path, out_data) # output the metrics for each category of features used json.dump(symlog_metrics, open(os.path.join(OUT_DIR, exp_name, 'symlog_metrics.json'), 'w'), indent=4) json.dump(big5_metrics, open(os.path.join(OUT_DIR, exp_name, 'big5_metrics.json'), 'w'), indent=4) json.dump(esem_train_metrics, open(os.path.join(OUT_DIR, exp_name, 'esem_train_metrics.json'), 'w'), indent=4) print('done', exp_name)
category_name = category_link['href'].split('/')[-1] category_link_html = requests.get( f"https://www.webscraper.io{category_link['href']}") category_link_soup = BeautifulSoup(category_link_html.content, 'html.parser') for sub_link in category_link_soup.findAll( 'a', {'class': 'subcategory-link'}): # Parcours des sous-catégories du menu sub_link_name = sub_link['href'].split('/')[-1] sub_link_html = requests.get( f"https://www.webscraper.io{sub_link['href']}") sub_link_soup = BeautifulSoup(sub_link_html.content, 'html.parser') for item in sub_link_soup.findAll( 'div', {'class': 'col-sm-4 col-lg-4 col-md-4' }): # Parcours des items de la sous-catégorie item_title = item('a', {'class': 'title'})[0]['title'] item_price = item('h4', {'class': 'pull-right price'})[0].text ## item_description = item('p', {'class':'description'})[0].text # Ajout des informations à sauvegarder dans le fichier ods list_items.append( [category_name, sub_link_name, item_title, item_price]) # Sauvegarde au format ods data_to_save = OrderedDict() data_to_save.update({"Sheet 1": list_items}) save_data("scrap_result.ods", data_to_save)
# We found that all categories had the same base url base_url = "https://www.webscraper.io/test-sites/e-commerce/allinone" for page_url in ["/computers/laptops", "/computers/tablets", "/phones/touch"]: # We concatenate base_url with the pages urls url = f"{base_url}{page_url}" response = requests.get(url) if (response.status_code != 200): print(f"Page not fetched correcly. Code {response.status_code}") continue soup = BeautifulSoup(response.content, 'html.parser') items_soup = soup.find_all('a', {'class': 'title'}) for item_soup in items_soup: item = [] item.append(page_url) name = item_soup['title'] item.append(name) price_soup = item_soup.parent.previous_sibling.previous_sibling price = price_soup.string[1:] item.append(price) items.append(item) now = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') filename = f"concurrence-{now}.ods" save_data(filename, {'Feuille 1': items}) print(f'Ding ! Le fichier Spreadsheet est prêt ! ./{filename}')