def get_courses(doubleknot_roster): roster_file_type = _get_file_type(doubleknot_roster) # read the scouts/courses from the roster records = pyexcel.iget_records(file_type=roster_file_type, file_content=doubleknot_roster) courses = [] for record in records: course_name = program_re.search(record['Description']).group(0) period_results = period_re.search(record['Description']) if not period_results == None: period = period_results.group('period') else: period = '' if (not len(courses) == 0 and (not courses[-1]['name'] == course_name or not courses[-1]['period'] == period)): course = {'name' : course_name, 'period' : period} courses.append(course) elif len(courses) == 0: course = {'name' : course_name, 'period' : period} courses.append(course) return courses
def create_courses(doubleknot_roster): """Takes a byte buffer representing a doubleknot roster and writes the courses and scouts to the Backsplice database. Supported formats are: .xls, .xlsx, and .ods""" # helper function to create a single course def create_course(): # look for a course reference with the course name course_reference = CourseReference.objects.filter(name=course_name, period=period, year=str(datetime.date.today().year)) # if there is a course reference if course_reference.exists(): # create the last course course_reference = course_reference[0] course = Course.objects.create(week=1, start_date=start_date, end_date=end_date, course_reference=course_reference) # create the scouts and add them to the course for scout in scouts: last_name = scout['Last Name'] first_name = scout['First Name'] unit = troop_re.search( scout['Group Name (Registration)']).group(0) course_scout = Scout.objects.create(last_name=last_name, first_name=first_name, unit=unit) course_scout.save() course_scout.course_set.add(course) course.save() roster_file_type = _get_file_type(doubleknot_roster) # read the scouts/courses from the roster records = pyexcel.iget_records(file_type=roster_file_type, file_content=doubleknot_roster) scouts = [] course_name = '' period = '' course = None course_reference = None start_date = datetime.date.today() end_date = start_date # for each scout for record in records: temp_course_name = program_re.search(record['Description']).group(0) period_re_results = period_re.search( record['Description']) temp_course_period = '' if not period_re_results == None: temp_course_period = period_re_results.group('period') if (temp_course_name == course_name and temp_course_period == period): scouts.append(record) else: create_course() scouts = [] scouts.append(record) course_name = temp_course_name period = temp_course_period create_course()
def import_taobaoke_excel(): records = pe.iget_records(file_name=TAOBAOKE_EXCEL_FILE) for record in records: print("%s is aged at %d" % (record['Name'], record['Age'])) return records
def iget_records(self, **keywords): """Get a generator of a list of records from the file :param sheet_name: For an excel book, there could be multiple sheets. If it is left unspecified, the sheet at index 0 is loaded. For 'csv', 'tsv' file, *sheet_name* should be None anyway. :param keywords: additional key words :returns: A generator of alist of records """ params = self.get_params(**keywords) return pe.iget_records(**params)
def test_get_records_from_irregular_array(self): data = [["X", "Y", "Z"], [1, 2, 3], [4, 5]] result = pe.iget_records(array=data) eq_(list(result), [{ "X": 1, "Y": 2, "Z": 3 }, { "X": 4, "Y": 5, "Z": '' }])
def _apply_rules(self, from_file, sheet_name=None, row_filter=None, monitor_rules=False, **kwargs): """ Base rule application method. :param str from_file: The file to apply rules to :param str sheet_name: The name of the sheet to apply rules to :param callable row_filter: A callable which accepts a cleaned record and returns True if the record should be written out :param dict kwargs: Any named arguments, for the reading of the file :returns: Yields normalized records """ if not callable(row_filter): row_filter = self.__row_filter for record in pyexcel.iget_records(file_name=from_file, sheet_name=sheet_name, **kwargs): if row_filter(record, normalized=False): # start application of all registered rules for ( rule, rule_args, rule_kwargs, ) in self.rules: if monitor_rules and rule.__name__ not in \ self.__rule_stats: self.__rule_stats[rule.__name__] = 0 if rule in self.value_rules: # value rules are required to pass filtering for ( column, value, ) in self._filter_values(record, **rule_kwargs): # handle application of value rule record[column] = rule(self, record.copy(), column, *rule_args, **rule_kwargs) if monitor_rules: self.__rule_stats[rule.__name__] += 1 else: # handle application of record rule record = rule(self, record.copy(), *rule_args, **rule_kwargs) if monitor_rules: self.__rule_stats[rule.__name__] += 1 # row filtering done post record normalization if row_filter(record, normalized=True): yield record
def max_width(sheet_name): data = dict() for record in pyexcel.iget_records(file_name='../user/HanziLevelUp.xlsx', sheet_name=sheet_name): for k, v in record.items(): data.setdefault(k, []).append(len(str(v))) pyexcel.free_resources() for k, v in data.items(): data[k] = max(v) return data
def filldata1(): fs = FileSystemStorage('universityloanoffice/files') file_name = fs.path('') + '/' + 'data.xls' fl = pyexcel.iget_records(file_name=file_name) college = None for record in fl: if record['College'] != "" and not College.objects.filter(college_name=record['College']): college = College( college_name=record['College'], ) print(record['Course']) college.save() return True
def filldata2(): fs = FileSystemStorage('universityloanoffice/files') file_name=fs.path('') + '/' + 'data.xls' fl=pyexcel.iget_records(file_name=file_name) degree=None for record in fl: if record['Course'] != "" and not Degree.objects.filter(programe_name=record['Course']): degree=Degree( programe_name=record['Course'], tuition_fee=1500000, college_name=College.objects.get(college_name=record['College']) ) degree.save() return True
def main(argv): path = argv[0] records = pe.iget_records(file_name=path) pe.free_resources() # for record in records: # print("%s %s" % (record['name'], record['url'])) jinjaEnv = Environment(loader=FileSystemLoader(searchpath="templates/"), autoescape=select_autoescape(['html', 'xml'])) template = jinjaEnv.get_template('prometheus.yml.j2') for record in records: content = template.render(record=record) print(content)
def load(self, filename: str) -> Iterator[Record]: records = pyexcel.iget_records(file_name=filename) errors = [] try: for line_num, row in enumerate(records, start=1): try: yield self.from_row(filename, row, line_num) except FileParseException as err: errors.append(err) except pyexcel.exceptions.FileTypeNotSupported: message = 'This file type is not supported.' raise FileParseException(filename, line_num=-1, message=message) if errors: raise FileParseExceptions(errors)
def testRunTheCase(self): self.testcaseList = pyexcel.iget_records( file_name="测试案例集1-获取IP地址的地理信息.xlsx") print(self.testcaseList) for caseDict in self.testcaseList: time.sleep(3) with self.subTest(msg=caseDict["案例意图"]): result = self.getCityFromIP(caseDict["ip"]) if (isinstance(result, str)): self.assertEqual(caseDict["expect"], result, caseDict["案例意图"]) else: self.assertEqual(caseDict["expect"], result["city"], caseDict["案例意图"])
def test_create_question(): question = dict() records = pexcel.iget_records(file_name="questions.xls") # records = pexcel.iget_records(file_name="ABC_questions.xlsx") for record in records: question['q_content'] = record['질문'] question['q_emotion'] = record['감정분류'] question['q_tag1'] = record['태그1'] question['q_tag2'] = record['태그2'] q = Question(question=json.dumps(question)) # db.session.add(q) # db.session.commit() print(question)
def parse_xlsx(xlsx_filename): result = [] hinfo = HistoricalInfo() records = pyexcel.iget_records(file_name=xlsx_filename) for r1 in list(records): tr_type = TransactionType.Buy if r1["Type"].lower( ) == "buy" else TransactionType.Sell major, minor = MARKETS[r1["Market"]] dt = datetime.strptime(r1["Date"], "%Y-%m-%d %H:%M:%S") cad_rate = hinfo.get_cad_price(major, dt) ut = UniversalTransaction(Exchange.Binance, tr_type, dt, major, minor, float(r1["Amount"]), float(r1["Price"]), cad_rate) result.append(ut) return result
def load_test_data(self): records = None prop = self.config.load_properties_file() base_test_data = prop.get('RAFT', 'test_data') ui_file_path = os.path.join( self.cur_path, r"../TestData/{}.xlsx".format(base_test_data)) try: if ui_file_path is not None: records = exc.iget_records(file_name=ui_file_path) except Exception as ex: self.log.error("Failed to load test data.", ex) return records
def test_get_records_from_file(self): data = [ ["X", "Y", "Z"], [1, 2, 3], [4, 5, 6] ] sheet = pe.Sheet(data) testfile = "testfile.xls" sheet.save_as(testfile) result = pe.iget_records(file_name=testfile) eq_(list(result), [ {"X": 1, "Y": 2, "Z": 3}, {"X": 4, "Y": 5, "Z": 6} ]) os.unlink(testfile)
def send_sms_from_excel(excel_file, msg_template=""): broadcasts_endpoint = apiv2_endpoint + "broadcasts.json" # excel_file is the remote samba file name obj = read_remote_samba_file(excel_file) if not obj: print("Failed to read file: {} from SAMBA server:".format(excel_file)) return file_name = obj.name obj.close() records = pe.iget_records(file_name=file_name) for record in records: kws = { 'name': record['Name'], 'Name': record['Name'], 'results': record['Results'], 'Results': record['Results'], 'result': record['Results'], 'Result': record['Results'], 'labid': record['LabID'], 'LabID': record['LabID'], 'date': record['Sample Date'], 'Date': record['Sample Date'] } message = Template(msg_template).safe_substitute(kws) telephone = format_msisdn(record["Telephone"]) if not telephone: continue print("TO:{}, MSG:{}".format(telephone, message)) params = {'urns': ["tel:{}".format(telephone)], 'text': message} post_data = json.dumps(params) try: requests.post(broadcasts_endpoint, post_data, headers={ 'Content-type': 'application/json', 'Authorization': 'Token %s' % api_token }) # print("Broadcast Response: ", resp.text) except: print("ERROR Sending Broadcast") deleted = delete_remote_samba_file(excel_file) if deleted: print("Remote SAMBA file:{} successfully deleted".format(excel_file)) pe.free_resources() os.unlink(file_name)
def load_sheet(file): m = re.search(r'.*(\.\w+)$', file.name) if not m: suffix = '.csv' else: suffix = m[1] if suffix in {'.csv', '.tsv'}: dialect = {'.csv': csv.excel, '.tsv': csv.excel_tab}[suffix] with file.open('r', encoding='utf8') as f: reader = csv.DictReader(f, dialect=dialect) return list(reader) else: import pyexcel return pyexcel.iget_records(file_name=file.name)
def load_test_data(self): records = None # noinspection PyBroadException prop = self.config.load_properties_file() base_test_data = prop.get('RAFT', 'base_test_data') ui_file_path = os.path.join(self.cur_path, r"../TestData/{}.xlsx".format(base_test_data)) try: if ui_file_path is not None: records = exc.iget_records(file_name=ui_file_path) except Exception as ex: traceback.print_exc(ex) return records
def getDocumentObject(): valarry = [] cnt = 1 records = pe.iget_records(file_name=inputfile) for row in records: outdict = {} outdict['@search.action'] = 'upload' if (row[fields_map[0][0]]): outdict['Index'] = str(row['Index']) for (in_fld, out_fld) in fields_map: outdict[out_fld] = row[in_fld] valarry.append(outdict) cnt+=1 return {'value' : valarry}
def businno2businflag(base_dir): records = pyexcel.iget_records( file_name=os.path.join(base_dir, "busin2businflag.xlsx")) sqls = " " for record in records: splitstr = record["busin_flags"] if len(splitstr) <= 0: continue sql = "insert into qs_tbusinflagtolic(busin_no, busin_flag) values(" + str( record["busin_no"]) + "," for businflag in record["busin_flags"].split(","): if businflag.isdigit() == False: continue sqls += sql + businflag + ");\ncommit;\n" print("=========\n%s" % sqls)
def load_excel_data(self): """ This methods is used for loading excel file data :return: it returns excel records """ records = None # noinspection PyBroadException try: if self.file_path is not None: records = exc.iget_records(file_name=self.file_path) except Exception as ex: traceback.print_exc(ex) return records
def get_videos(info_file_path): if not os.path.exists(new_folder_name): os.mkdir(new_folder_name) names = dict() exist_count = dict() for i in pe.iget_records(file_name=info_file_path): name = str(i['description']).replace('/', '.') if name in exist_count: exist_count[name] += 1 name += '_' + str(exist_count[name]) else: exist_count[name] = 1 names[i['video_url']] = name if not os.path.exists(new_folder_name): os.mkdir(new_folder_name) return names
def to_mongo(year): mongo = get_mongo_collection(year) filepath = './datasets/data20' + str(year) + '10cleaned.xls' if year == 16: filepath = './datasets/data20' + str(year) + '10cleaned.xlsx' column_names = { '13': [ 'Num', 'ClassNum', 'Sex', 'Nation', 'College', 'Department', 'Major', 'Grade', 'Rxrq', 'Address', 'Height', 'Weight', 'Kmrun', 'Pulmonary', 'Jump', 'Run', 'Arm', 'Chin', 'LeftEye', 'RightEye' ], '14': [ "GradeNum", "Major", "SchoolNum", "ClassNum", "Class", "Num", "NationNum", "Sex", "AddressNum", "Address", "Cancelled", "CancelledReason", "Height", "Weight", "Pulmonery", "Eighty", "Thousand", "Run", "Jump", "SitAndReach", "Situp", "Pullup" ], '15': [ 'GradeNum', 'ClassNum', 'Num', 'Sex', 'Height', 'Weight', 'BMI', 'Pulmonery', 'Run', 'Jump', 'SitAndReach', 'Eighty', 'Thousand', 'Situp', 'Pullup' ], '16': [ 'SchoolNum', 'GradeNum', 'ClassNum', 'Class', 'CardNum', 'Num', 'NationNum', 'Sex', 'AddressNum', 'Address', 'Cancelled', 'CancelledReason', 'Height', 'Weight', 'SitAndReach', 'Situp', 'Pulmonary', 'Run', 'Jump', 'Pullup', 'Eighty', 'Thousand' ] } ''' data = get_data(filepath) json_str = json.dumps(data, ensure_ascii=False) data_dict = json.loads(json_str, encoding='utf-8') ''' records = p.iget_records(file_name=filepath) total_num = {'13': 10825, '14': 3474, '15': 15410, '16': 15414} for record in records: num = record['Num'] if mongo.count({'Num': num}) == 0: traits = dict() for column in column_names[str(year)]: traits[column] = record[column] mongo.insert_one(traits) print('Progress: {:.2%}'.format(mongo.count() / total_num[str(year)]))
def update_status(request, file_name, reg_no): fn=file_name fs = FileSystemStorage('loanboard/files') file_name=fs.path('')+'/'+file_name fl=pyexcel.iget_records(file_name=file_name) loanbeneficiary=Beneficiary.objects.all() if reg_no=="all": for std in fl: bf=Beneficiary.objects.get(form_four_index_no=std['Form 4 Index Number']) if std['Status'] != bf.status: bf.status=std['Status'] bf.save() templete_data={ 'beneficiaries': loanbeneficiary, } return redirect('/loanboard/view-changes/'+fn)
def set_question_api(q_name): # records = pexcel.iget_records(file_name="questions.xls") records = pexcel.iget_records(file_name="./tests/" + q_name + ".xls") for record in records: question = dict() question['q_content'] = record['질문'] question['q_emotion'] = record['감정분류'] question['q_tag1'] = record['태그1'] question['q_tag2'] = record['태그2'] q = Question(question=json.dumps(question)) db.session.add(q) db.session.commit() qs = Question.query().all() return jsonify({'code': 200, 'count': len(qs)})
def getDocumentObjectByChunk(start, end): valarry = [] cnt = 1 records = pe.iget_records(file_name=inputfile) for i, row in enumerate(records): if start <= i < end: outdict = {} outdict['@search.action'] = 'upload' if (row[fields_map[0][0]]): outdict['Index'] = str(row['Index']) for (in_fld, out_fld) in fields_map: outdict[out_fld] = row[in_fld] valarry.append(outdict) cnt+=1 return {'value' : valarry}
def view_changes(request, file_name): fn=file_name fs = FileSystemStorage('loanboard/files') file_name=fs.path('')+'/'+file_name fl=pyexcel.iget_records(file_name=file_name) list_to_be_updated=[] for std in fl: try: if std['Status'] != Beneficiary.objects.get(form_four_index_no=std['Form 4 Index Number']).status: list_to_be_updated.append( ( Beneficiary.objects.get(reg_no=std['Registration Number']), std['Status'] ) ) except Exception: continue list_to_be_updated=paginate(list_to_be_updated, 13, request) return render(request, 'updates.html', {'beneficiaries': list_to_be_updated, 'file_name': fn})
def update_papercut(opaque): with flask_app.app_context(): try: update_students = msettings.get_configuration_setting( 'papercut-update-students') if update_students: username = msettings.get_configuration_setting( 'papercut-login') password = msettings.get_configuration_setting( 'papercut-password') url = msettings.get_configuration_setting('papercut-url') remote_file = msettings.get_configuration_setting( 'papercut-file').replace('\\\\', '/') filename = Path(remote_file).name local_file = f'{PAPERCUT_LOCATION}/{filename}' local_temp_file = f'{PAPERCUT_LOCATION}/temp.xlsm' ssh_client.connect(url, username=username, password=password) transport = ssh_client.get_transport() sftp = sftp_client.SFTPClient.from_transport(transport) sftp.get(remote_file, local_temp_file) sftp.close() ssh_client.close() try: file_updated = not filecmp.cmp(local_file, local_temp_file) except FileNotFoundError: file_updated = True if file_updated: students = mperson.get_persons(role=mperson.ROLE.STUDENT) students_cache = {s.ss_internal_nbr: s for s in students} lines = pyexcel.iget_records(file_name=local_temp_file) nbr_updated_students = 0 for line in lines: if str(line[HEADING_STUDENT_ID]) in students_cache: student = students_cache[str( line[HEADING_STUDENT_ID])] if student.rfid_code != line[HEADING_BADGE]: student.rfid_code = line[HEADING_BADGE] mperson.update_flag(student, True) nbr_updated_students += 1 mperson.end_update_bulk_person() log.info( f'papercut students: updated: {nbr_updated_students}/') shutil.copyfile(local_temp_file, local_file) except Exception as e: log.error(f'papercut job task: {e}')
def uploadDocumentsInChunks(chunksize): records = pe.iget_records(file_name=inputfile) cnt = 0 for row in records: cnt += 1 for chunk in range(int(cnt/chunksize) + 1): print('Processing chunk number %d ...' % chunk) start = chunk * chunksize end = start + chunksize documents = json.dumps(getDocumentObjectByChunk(start, end)) servicePath = '/indexes/' + indexName + '/docs/index?api-version=' + apiVersion r = postMethod(servicePath, documents) if r.status_code == 200: print('Success: %s' % r) else: print('Failure: %s' % r.text) return
def parse_excel_to_save(): records = pe.iget_records(file_name="lncRNA-miRNA-mRNA_assciations.xlsx") for record in records: try: collection.insert_one({ 'PubMed_ID': record['PubMed ID'], 'Journal': record['Journal'], 'Title': record['Title'], 'Year': record['Year'], 'Gene': record['Gene'], 'Gene_ID': record['Gene ID (All)'], 'LncRNA': record['LncRNA'], 'Disease_Tissue': record['Disease/Tissue'], 'MiRNA': record['MiRNA'], 'Pathway_Name': record['Pathway Name'] }) except: print('exception')
def test_issue_95_preserve_custom_order_in_iget_orders(): test_data = [["a", "b", "c"], ["1", "2", "3"], ["4", "5", "6"]] records = p.iget_records(array=test_data, custom_headers=["c", "a", "b"]) result = [] for record in records: for key, value in record.items(): result.append([key, value]) expected = [ ["c", "3"], ["a", "1"], ["b", "2"], ["c", "6"], ["a", "4"], ["b", "5"], ] eq_(result, expected)
def load_test_data(self): """ This methods is used for loading excel file data for UI cases :return: it returns excel records """ records = None # noinspection PyBroadException prop = self.config.load_properties_file() base_test_data = prop.get('RAFT', 'base_test_data') ui_file_path = os.path.join(self.cur_path, r"../TestData/{}.xlsx".format(base_test_data)) try: if ui_file_path is not None: records = exc.iget_records(file_name=ui_file_path) except Exception as ex: self.log.error("Failed to load test data.", ex) return records