Ejemplo n.º 1
0
def getFlightData(flightnum, day, path):
    """
        Function: getFlightData
        -------------------
        converts the csv data created by the HTML Scrapper Tool to readable excel files:
            first sheet contains acutal flight path data
            second sheet contains waypoint data

        flightnum: name of the flight number and airlines (DAL1929)
        day: date of flight - 8 characters(yyyymmdd)
        path: location of the stored csv files and place where xlsx will be saved

        returns: name of the created xlsx 
    """
    
    from pyexcel.cookbook import merge_all_to_a_book
    import pyexcel.ext.xlsx     

    fileID = path  #path name of data

    merge_all_to_a_book([fileID+ flightnum+day+".csv",
                         fileID+flightnum+day+"waypoints"+".csv"], flightnum+day+".xlsx") # Creates excel file
    #Sheet 1: tracker points, Sheet 2: Waypoints

    return str(flightnum+day+'.xlsx')
Ejemplo n.º 2
0
def merge_json_files(args):
    try:
        os.makedirs(args.output)
    except OSError:
        pass
    if args.session_name is not None:
        files_list = glob.glob("{}/*/{}/*.json".format(args.directory, args.session_name))
    else:
        files_list = glob.glob("{}/*/*.json".format(args.directory))
    if len(files_list) == 0:
        print("No file loaded, make sure you have entered correct parameters")
        exit(0)
    registered_services = get_all_services(files_list, args)
    for service in registered_services:
        try:
            registered_services[service] = [i for n, i in enumerate(registered_services[service]) if
                                i not in registered_services[service][n + 1:]]
        except TypeError as err:
            print("Passing double entries for {} (err: {}) ...".format(service, str(err)),
                  file=stderr)
        if len(service + ".csv") > 31:
            if args.verbose > 0:
                print("Service", service, "is", len(service + ".csv"), "length ; changing the name to:", service[len(service + ".csv") - 31:])
            csv_convert.convert_file(registered_services[service], "{}/{}.csv".format(args.output, service[len(service + ".csv") - 31:]))
        else:
            csv_convert.convert_file(registered_services[service], "{}/{}.csv".format(args.output, service))
    merge_all_to_a_book(glob.glob("{}/*.csv".format(args.output)), "{}/Listing_resources.xlsx".format(args.output))
Ejemplo n.º 3
0
def plot_peaks_troughs(inst_name):
    df = pd.read_csv(inst_name + ".csv")
    merge_all_to_a_book(glob.glob(inst_name + ".csv"), inst_name + ".xlsx")
    df_new = pd.read_excel(inst_name + ".xlsx")
    for i in range(len(df_new)):
        date = df.loc[i, "Date"]
        date_upd = date.split("+")[0]

        df.loc[i, "Date"] = date_upd
        df_new.loc[i, "Date_Time"] = datetime.strptime(df.loc[i, "Date"],
                                                       '%Y-%m-%d %H:%M:%S')

    df_new = df_new.tail(90)
    x = np.array(df_new["Date_Time"].tolist())
    x = [i.strftime('%Y-%m-%d %H:%M:%S') for i in x]
    df_new["Date"] = x
    peak90, peak90time, trough90, trough90time = plot_and_return_peak_trough(
        df_new, inst_name, 90)
    peak30, peak30time, trough30, trough30time = plot_and_return_peak_trough(
        df_new.tail(30), inst_name, 30)

    peak90df = pd.DataFrame({"Date": peak90time, "Peak": peak90})
    trough90df = pd.DataFrame({"Date": trough90time, "Trough": trough90})
    peak30df = pd.DataFrame({"Date": peak30time, "Peak": peak30})
    trough30df = pd.DataFrame({"Date": trough30time, "Trough": trough30})
    peak90df.to_csv(inst_name + "peak90" + ".csv")
    trough90df.to_csv(inst_name + "trough90" + ".csv")
    peak30df.to_csv(inst_name + "peak30" + ".csv")
    trough30df.to_csv(inst_name + "trough30" + ".csv")
Ejemplo n.º 4
0
    def __init__(self, transaction_spreadsheet, workbook):
        self.path = workbook
        self.workbook_dest = load_workbook(workbook)
        self.ws_dest = self.workbook_dest['Raw Data']

        merge_all_to_a_book(glob.glob("{}/*.csv".format("Transactions")), "{}/output.xlsx".format("Transactions"))
        workbook_source = load_workbook('Transactions/output.xlsx')
        self.ws_source = workbook_source.active
Ejemplo n.º 5
0
def plot_peaks_troughs(inst_name, flag):
    df = pd.read_csv(inst_name + ".csv")
    merge_all_to_a_book(glob.glob(inst_name + ".csv"), inst_name + ".xlsx")
    df_new = pd.read_excel(inst_name + ".xlsx")
    for i in range(len(df_new)):
        date = df.loc[i, "Date"]
        date_upd = date.split("+")[0]

        df.loc[i, "Date"] = date_upd
        df_new.loc[i, "Date_Time"] = datetime.strptime(df.loc[i, "Date"],
                                                       '%Y-%m-%d %H:%M:%S')

    df_new = df_new.tail(90)  #get last 90 sample
    x = np.array(df_new["Date_Time"].tolist())
    x = [i.strftime('%Y-%m-%d %H:%M:%S') for i in x]
    print("instrument name", inst_name)

    df_new["Date"] = x
    peak90, peak90time, trough90, trough90time = plot_and_return_peak_trough(
        df_new, inst_name, 90, flag)
    peak30, peak30time, trough30, trough30time = plot_and_return_peak_trough(
        df_new.tail(30), inst_name, 30, flag)

    peak90df = pd.DataFrame({"Date": peak90time, "Peak": peak90})
    trough90df = pd.DataFrame({"Date": trough90time, "Trough": trough90})
    peak30df = pd.DataFrame({"Date": peak30time, "Peak": peak30})
    trough30df = pd.DataFrame({"Date": trough30time, "Trough": trough30})
    if flag == True:
        peak90df.to_csv("man_select_inst" + "\\" + inst_name + "peak90" +
                        ".csv")
        trough90df.to_csv("man_select_inst" + "\\" + inst_name + "trough90" +
                          ".csv")
        peak30df.to_csv("man_select_inst" + "\\" + inst_name + "peak30" +
                        ".csv")
        trough30df.to_csv("man_select_inst" + "\\" + inst_name + "trough30" +
                          ".csv")
    else:
        peak90df.to_csv("rule_select_inst" + "\\" + inst_name + "peak90" +
                        ".csv")
        trough90df.to_csv("rule_select_inst" + "\\" + inst_name + "trough90" +
                          ".csv")
        peak30df.to_csv("rule_select_inst" + "\\" + inst_name + "peak30" +
                        ".csv")
        trough30df.to_csv("rule_select_inst" + "\\" + inst_name + "trough30" +
                          ".csv")
Ejemplo n.º 6
0
def xls_write(lexem_hash, identifier_hash, const_hash):
    with open('csv\\lexems.csv', 'w') as writeFile:
        writer = csv.writer(writeFile)
        writer.writerows(array_lexems)
        writer.writerows(lexem_hash)
    with open('csv\\identifiers.csv', 'w') as writeFile:
        writer = csv.writer(writeFile)
        writer.writerows(array_identifiers)
        writer.writerows(identifier_hash)
    with open('csv\\consts.csv', 'w') as writeFile:
        writer = csv.writer(writeFile)
        writer.writerows(array_consts)
        writer.writerows(const_hash)
    merge_all_to_a_book(glob.glob("csv\\lexems.csv"), "xls\\lexems.xlsx")
    merge_all_to_a_book(glob.glob("csv\\identifiers.csv"),
                        "xls\\identifiers.xlsx")
    merge_all_to_a_book(glob.glob("csv\\consts.csv"), "xls\\consts.xlsx")
Ejemplo n.º 7
0
#-*-coding:utf-8
import pyexcel.cookbook as pc
import sys
import time

# 작업 시작 메시지를 출력합니다.
print("Process Start")

# 시작 시점의 시간을 기록합니다.
start_time = time.time()

# 터미널에서 인자를 입력받기 위한 코드입니다.
# 엑셀로 변환하고자 하는 CSV 파일의 이름을 입력합니다.
input_file = sys.argv[1]
# 합쳐진 결과물 파일을 어떤 이름으로 저장할지 입력받습니다.
result_file = sys.argv[2]

# 엑셀 파일 하나로 합쳐주는 함수입니다.
# 라이브러리가 기본적으로 제공해 주는 함수입니다.
pc.merge_all_to_a_book([input_file], result_file)

# 작업 종료 메시지를 출력합니다.
print("Process Done.")

# 작업에 총 몇 초가 걸렸는지 출력합니다.
end_time = time.time()
print("The Job Took " + str(end_time - start_time) + " seconds.")
#Gael Blanchard
#Basic Data Wrangling with Python
#Data: World Happiness Report from Kaggle.com
#Required Libraries
from pyexcel.cookbook import merge_all_to_a_book
import pyexcel.ext.xlsx
import glob
import xlrd
from xlrd.sheet import ctype_text
import agate
import agatestats
import numpy
import matplotlib.pyplot as plt

#We will combine all our csvs into a workbook which we will then use for our data wrangling
merge_all_to_a_book(glob.glob("/path/to/worldhappiness/reportfolder/*.csv"),"output.xlsx")
#uses our created workbook
workbook = xlrd.open_workbook("output.xlsx")
#Test:
#print(workbook.nsheets)
#print(workbook.sheet_names())

# selects which sheet we want to use. Corresponds to 2015.csv
sheet = workbook.sheets()[0]

#Test:
#print(sheet.nrows)
#sheet.row_values(0)

#for row in range(sheet.nrows):
#	print(row, sheet.row(row))
Ejemplo n.º 9
0
#create fake site list...
siteList = ['export']
idsite = 0

#Create a CSV for each site
for site in siteList:
        listCase = GetCases(thehive_api_url,thehive_key)
        fileName= 'export' + '.csv'
        PutCasesOnFile(fileName,listCase)
        idsite += 1

#Create a xlsx for each site
for site in siteList:
        sitecsv = site + '.csv'
        sitexlsx = site + '.xlsx'
        merge_all_to_a_book(glob.glob(sitecsv),sitexlsx)
        sheet = pyexcel.get_sheet(file_name=sitexlsx)
        dataList = csvToList()
        rawHeaders = dataList[0]
        theHeaders=[]
        for item in rawHeaders:
                theHeaders.append({'header': item})
        del dataList[0]
        colCount = len(list(sheet.columns()))
        colName = colnum_string(colCount)
        rowCount = len(list(sheet.rows())) - 1
        tableDelimiters = 'A1:' + str(colName) + str(rowCount)
        workbook = xlsxwriter.Workbook(sitexlsx)
        worksheet1 = workbook.add_worksheet(sitecsv)
        worksheet1.add_table(tableDelimiters,{'data': dataList, 'columns': theHeaders})
        workbook.close()
Ejemplo n.º 10
0
def csvMerger(your_csv_directory):
    """
	Merges all CSVs into one big Excel
	"""
    merge_all_to_a_book(glob.glob(os.path.join(your_csv_directory, "*.xlsx")),
                        "LI_DATA_ALL.xlsx")
Ejemplo n.º 11
0
#!/usr/bin/python3
import glob

from pyexcel.cookbook import merge_all_to_a_book

if __name__ == "__main__":
    merge_all_to_a_book(
        sorted(
            glob.glob("/home/rafael/Temp/rev-saude/por_ano/t2/classes/*.csv")),
        "/home/rafael/Temp/rev-saude/por_ano/t2/rev-sau-50.xlsx")
Ejemplo n.º 12
0
def yahoo_write(tckr, sd, sm, sy, ed, em, ey):
    # set the ticker value from the file, strip and make everything into uppercase
    ticker = tckr.upper().strip()
    m1 = str(sm).strip()
    d1 = str(sd).strip()
    y1 = str(sy).strip()

    # sets the end and start dates from the render to the values that would be used in the string and convert them to a string
    m2 = str(em).strip()
    d2 = str(ed).strip()
    y2 = str(ey).strip()
    startdate = str(m1 + "/" + d1 + "/" + y1)
    enddate = str(m2 + "/" + d2 + "/" + y2)

    print(" start date is %s and type is %s " % (startdate, type(startdate)))
    print("end date is %s and type is %s " % (enddate, type(enddate)))
    print("ticker is %s and type is %s" % (ticker, type(ticker)))

    # Timestamp value for startdate and enddate is the complete numerical representation of date, month and year
    # representation of date.
    timestamp_startdate = int(
        time.mktime(
            datetime.datetime.strptime(startdate, "%m/%d/%Y").timetuple()))
    timestamp_enddate = int(
        time.mktime(
            datetime.datetime.strptime(enddate, "%m/%d/%Y").timetuple()))
    timestamp_difference = int(timestamp_enddate) - int(timestamp_startdate)
    actual_end = (timestamp_enddate)
    actual_start = (timestamp_startdate)

    print("start time is ", int(timestamp_startdate))
    print("end time is ", int(timestamp_enddate))
    print("difference in timestamp is ",
          ((timestamp_enddate) - (timestamp_startdate)))
    # This is the value need to make it a shift by one day i.e. 24 hours in time stamp conversion.
    step = int(10540800)
    table_complete = []

    pool_input_list = []
    pool_input_tuple = ()
    j = 0
    # The range starts from descending order from the last date to the date which comes by subtracting the one page
    # value of timestamp.
    for i in range(actual_start, actual_end, step):
        timestamp_startdate = timestamp_enddate - 10540800
        if (timestamp_startdate <= actual_start):
            timestamp_startdate = actual_start
        # Ticker name is company name in 2-4 letters is unique for every product, this needs to be changed to get value
        #  of each product.
        url_page = "https://finance.yahoo.com/quote/" + ticker + "/history?period1=" + str(
            timestamp_startdate) + "&period2=" + str(
                timestamp_enddate) + "&interval=1d&filter=history&frequency=1d"

        # Creates a list of URLs, one for each page. We can only do this if we get the total no. of pages in the previous
        #  step.
        pool_input_list.append([[j, url_page]])
        timestamp_enddate = timestamp_startdate - 86400
        j = j + 1
    # All the pages are then appended into a list in the previous step and converted into a tuple.
    pool_input_tuple = tuple(pool_input_list)
    print(pool_input_tuple)

    # The multiprocessing process is initiated with a total number of processes as 4. The URLs and the URL numbers
    # are passed as a input.
    p = multiprocessing.Pool(processes=4)
    p.map(parsing_yahoo, pool_input_tuple)

    #This function is specific to excel sheets combining.
    merge_all_to_a_book(
        glob.glob("C:/Users/vamshi/Desktop/DATA_EXTRACTION/yahoo/" +
                  str(ticker) + "/*.xlsx"),
        "C:/Users/vamshi/Desktop/DATA_EXTRACTION/yahoo/" + str(ticker) +
        "/Yahoo Data combined.xlsx")

    # All the files that were created per page are accessed and combined into a single Combined file.
    rd = glob.glob("C:/Users/vamshi/Desktop/DATA_EXTRACTION/yahoo/" +
                   str(ticker) + "/*.txt")
    with open(
            "C:/Users/vamshi/Desktop/DATA_EXTRACTION/yahoo/" + str(ticker) +
            "/Yahoo Data combined.txt", "wb") as outfile:
        for f in rd:
            with open(f, "rb") as infille:
                outfile.write(infille.read())

    # The single file is opened and all the lines are read, this is done to display the output to the screen.
    file = open("C:/Users/vamshi/Desktop/DATA_EXTRACTION/yahoo/" +
                str(ticker) + "/Yahoo Data combined.txt")
    lines = file.readlines()
    for line in lines:
        yield (line)
    file.close()
Ejemplo n.º 13
0
def getConvert():
	merge_all_to_a_book(glob.glob("data/publicacoes_tudo/tudo_all.csv"), "data/publicacoes_tudo/tudo_all.xlsx")
	merge_all_to_a_book(glob.glob("data/autores/autores_juncao/fullname_all.csv"), "data/autores/autores_juncao/fullname_all.xlsx")
	merge_all_to_a_book(glob.glob("data/atuacoes/atuacoes_juncao/atuacoes_all.csv"), "data/atuacoes/atuacoes_juncao/atuacoes_all.xlsx")
	print ("Conversão para geração dos grafos feita com sucesso")
	print("------------------")
Ejemplo n.º 14
0
    surname = sdg.askstring(key, 'Podaj nazwisko')
    hangers_dict[key].update({'Imie': name, 'Nazwisko': surname})

print(hangers_dict)

#testing
from collections import Counter
x = Counter(lines)
for i in x.keys():
    if x[i] != hangers_dict[i[0:2]][i[2:]]:
        print('Error')

#excel time!
import pandas as pd
import sqlite3
import time
now = time.strftime('%Y-%m-%d')

df = pd.DataFrame.from_dict(hangers_dict, orient='Index')
df = df[[
    'Nazwisko', 'Imie', 'ADULT', 'CLIP', 'JACKET', 'KIDS', 'KNIT', 'SCRAP'
]]
df.to_csv('final.csv')
conn = sqlite3.connect("hangers_sortation_database")
df.to_sql(now, conn, if_exists='append', index=False)

from pyexcel.cookbook import merge_all_to_a_book
import glob

merge_all_to_a_book(glob.glob("final.csv"), "output.xlsx")
Ejemplo n.º 15
0
from pyexcel.cookbook import merge_all_to_a_book
# import pyexcel.ext.xlsx # no longer required if you use pyexcel >= 0.2.2
import glob

merge_all_to_a_book(glob.glob("data.csv"), "data.xlsx")
Ejemplo n.º 16
0
from pyexcel.cookbook import merge_all_to_a_book
import glob

merge_all_to_a_book(glob.glob("./sample.csv"), "output.xlsx")
Ejemplo n.º 17
0
csv_list = []
for each_key in list(runtimes_dict.keys()):
    rows_per_type = len(runtimes_dict.get(each_key)[0])
    for i in range(rows_per_type):
        temp_dict = {
            column_headers[0]: each_key,
            column_headers[1]: runtimes_dict.get(each_key)[0][i],
            column_headers[2]: runtimes_dict.get(each_key)[1][i]
        }
        csv_list.append(temp_dict)

currentPath = os.getcwd()
csv_file = currentPath + "/runtimes.csv"

prGreen("Converting CSV File to a Microsoft Excel Spreadsheet...\n")
with open(csv_file, 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=column_headers)
    writer.writeheader()
    for data in csv_list:
        writer.writerow(data)
merge_all_to_a_book(glob.glob(csv_file), "runtimes.xlsx")

os.remove(csv_file)

prGreen("Importing Excel Results to Pandas Dataframe for Console Output...\n")
ms_excel_path = ('runtimes.xlsx')
excel_file = pd.ExcelFile(ms_excel_path)
sheet1_name = excel_file.sheet_names
xls_dataframe = excel_file.parse(sheet1_name)
prYellow(xls_dataframe)
Ejemplo n.º 18
0
import openpyxl
from pprint import pprint

book = openpyxl.load_workbook("./melon_top_100.csv")
sheet = book.worksheets[0]

data = []
for r in sheet.rows:
    data.append([ r[0].value, r[1].value, r[3].value ])

#del data[0]    # header 제거

data = sorted(data, key=lambda x: x[2], reverse=True)
pprint(data)
book.save("./Melon_top_100.xlsx")

from pyexcel.cookbook import merge_all_to_a_book
# import pyexcel.ext.xlsx # no longer required if you use pyexcel >= 0.2.2 
import glob


merge_all_to_a_book(glob.glob("your_csv_directory/*.csv"), "output.xlsx")
Ejemplo n.º 19
0
from pyexcel.cookbook import merge_all_to_a_book
import glob

merge_all_to_a_book(glob.glob("firstscrapy/*.csv"),
                    "firstscrapy/tripRestaurants.xlsx")
Ejemplo n.º 20
0
            "rollup": 3600000,
            "fillTimeSeries": "true",
            "snapshotId": items['snapshotId']
        }
        headers = {
            'authorization': conn['auth'],
        }

        cpuUsage = requests.request("GET",
                                    url,
                                    headers=headers,
                                    params=paramsCPU,
                                    verify=False).json()
        memUsage = requests.request("GET",
                                    url,
                                    headers=headers,
                                    params=paramsMem,
                                    verify=False).json()

        for x in range(len(cpuUsage['values'])):
            line[0] = datetime.fromtimestamp(
                cpuUsage['values'][x]['timestamp'] / 1000)
            line[4] = cpuUsage['values'][x]['value']
            line[5] = memUsage['values'][x]['value']
            thewriter.writerow(line)

# import pyexcel.ext.xlsx # no longer required if you use pyexcel >= 0.2.2
import glob

merge_all_to_a_book(["nodes.csv", "pods.csv"], "output.xlsx")
Ejemplo n.º 21
0
driver = webdriver.Chrome("C:\\Python\\selenium\\chrome\\chromedriver.exe")
for x in range(0, len(links)):
	driver.get(main_data + links[x])
	sleep(3)
	tempRank = []
	
	# Get ranks for each player in a position
	list = driver.find_elements_by_xpath("//*[@class!='inline-table']/tbody/tr")
	for y in range(1, len(list)):
		name = driver.find_element_by_xpath("//*[@class!='inline-table']/tbody/tr["+str(y)+"]/td/a").text
		score = driver.find_element_by_xpath("//*[@class!='inline-table']/tbody/tr["+str(y)+"]/td[8]").text
		tempRank.append([y, name, score])

	ranks.append(tempRank)


# Prints players into csv file
with open(new_file + ".csv", 'w', newline='') as myfile:
	wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
	
	for x in range(0, len(ranks)):
		wr.writerow([positions[x] + " Rankings: Week " + str(week)])
		wr.writerow(["Rank", "Name", "Analyst Avg Rank"])
		wr.writerows(ranks[x])
		wr.writerow([])
	
merge_all_to_a_book(glob.glob(new_file + ".csv"), new_file + ".xlsx")	
os.remove(new_file + ".csv")

# Close browser
driver.quit()
# Remove all remaining spaces in all headers found exclusively in datacolumns
for idx in range(len(stringkeys)):
	datacolumns[idx][0]= datacolumns[idx][0].replace(' ','')

# Initalize the file and a variable that contains all columns
resultFyle = open('out.csv','wb')
wr = csv.writer(resultFyle, dialect='excel')
all_columns = [phone_numbers] + datacolumns

# Writing routine that writes to CSV  
for row_idx in range(num_rows):
  # The following three lines could be a list comprehension: row_to_write = [column[row_idx] for column in all_columns]
  row_to_write = []
  for column in all_columns:
    row_to_write.append(column[row_idx])
  
  wr.writerow(row_to_write)

# Convert csv to xlsx
merge_all_to_a_book(glob.glob("out.csv"), fileout)

# Remove the out.csv file the routine produces 
os.remove('out.csv') 

print 'Your data has been massaged.'





Ejemplo n.º 23
0
def csv_to_xls(cf,nf):
  merge_all_to_a_book(glob.glob(cf), nf)
Ejemplo n.º 24
0
        path: location of the stored csv files and place where xlsx will be saved

        returns: name of the created xlsx 
    """
    
    from pyexcel.cookbook import merge_all_to_a_book
    import pyexcel.ext.xlsx     #needed to support xlsx format

    zones = ['boston', 'miami', 'ftworth', 'chicago', 'saltlakecity', 'sanfrancisco'] #names for zones
    fileID = path #path for weather
    csvHeaders = [] #Lists to store files names

    for i in range(6):
        csvHeaders.append(fileID+zones[i]+str(day)+str(month)+str(now.year)+'.csv')#paths of csv files of data for each zone

    merge_all_to_a_book([csvHeaders[0], csvHeaders[1], csvHeaders[2], csvHeaders[3],
                         csvHeaders[4], csvHeaders[5]], 'weather'+ str(day) + str(month)+'.xlsx')

    return 'weather'+str(day) + str(month)+'.xlsx'


def weatherMap(day, month,year):
    """
    Function: weatherMap
    ------------------------
    creates a networkx graph of all weather points given from the AWS data files and
    makes them into nodes on the graph, parsed for alitude, speed, direction and temperature

    day: day the data was collected
    month: month the data was collected
    year: year the data was collected
Ejemplo n.º 25
0
        spam_results[i]['autonomous_system_number'] = all_whois[
            spam_results[i]['ip_address']][1]

for i in range(len(malware_results)):

    if 'ip_address' in malware_results[i].keys(
    ) and malware_results[i]['ip_address'] in all_whois.keys():

        malware_results[i]['autonomous_system_name'] = all_whois[
            malware_results[i]['ip_address']][0]
        malware_results[i]['autonomous_system_number'] = all_whois[
            malware_results[i]['ip_address']][1]

dictToCSV(malware_results, "malware")

#dictToCSV(spam_results, "spam")

merge_all_to_a_book(["malware.csv", "spam.csv"], "output.xlsx")
'''
for csvfile in ["malware.csv", "spam.csv"]:
    wb = xlwt.Workbook()
    ws = wb.add_sheet(csvfile.split('.')[0])
    with open(csvfile, 'rb') as f:
            reader = csv.reader(f)
            for r, row in enumerate(reader):
                for c, col in enumerate(row):
                    ws.write(r, c, col)
    wb.save('output.xls')

'''
Ejemplo n.º 26
0
# скрипт формирует файл .xls или .xlsx из одного или нескольких
# файлов .csv в текущей папке

from pyexcel.cookbook import merge_all_to_a_book
import shutil, os, random, string, glob

# получаем список .csv файлов в директории
file_names = glob.glob("*.csv")
# генерируем случайное имя директории для временных файлов и создаем ее
name_of_tempdir = ''.join(random.choice(string.ascii_lowercase) for i in range(7))
os.mkdir(name_of_tempdir)
# поскольку максимальная дли на имени листа для xls - 31символ, обрезаем имена файлов
sheet_names = [name_of_tempdir + '/' + x[:27] + '.csv' if len(x)>27
               else name_of_tempdir + '/' + x + '.csv'
               for x in [x[:-4] for x in file_names] ]
# копируем файлы с подходящим именем во временную директорию
for file_name, sheet_name in zip(file_names, sheet_names):
    shutil.copy(file_name, sheet_name)
# файлы из временной директории конвертируем в листы книги и сохраняем файл
# формат сохраняемого файла .xls или .xlsx определяется по расширению в имени
merge_all_to_a_book(sheet_names, "output.xls")
# удаляем временные файлы
for f in sheet_names: os.remove(f)
os.rmdir(name_of_tempdir)
Ejemplo n.º 27
0
# export the MongoDB documents as a JSON file
docs.to_json("./datas/Retail.json")

# have Pandas return a JSON string of the documents
json_export = docs.to_json()  # return JSON data
# print ("\nJSON data:", json_export)

# export MongoDB documents to a CSV file
docs.to_csv("./datas/Retail.csv", ",")  # CSV delimited by commas

# export MongoDB documents to CSV
csv_export = docs.to_csv(sep=",")  # CSV delimited by commas
print("\nCSV data:", csv_export)

# create IO HTML string
import io

html_str = io.StringIO()

# export as HTML
docs.to_html(buf=html_str, classes='table table-striped')

# print out the HTML table
print(html_str.getvalue())

# save the MongoDB documents as an HTML table
docs.to_html("./datas/Retail.html")
merge_all_to_a_book(glob.glob("./datas/Retail.csv"), "./datas/Retail.xls")

print("\n\ntime elapsed:", time.time() - start_time)
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 14 14:11:06 2019

@author: Luis Rodriguez
"""

from pyexcel.cookbook import merge_all_to_a_book
import pandas as pd
# import pyexcel.ext.xlsx # no longer required if you use pyexcel >= 0.2.2
import glob

df = pd.read_csv('Passwords.csv', encoding='ISO 8859-1')

df.to_csv('Passwords1.csv', index=False)

print(df)

merge_all_to_a_book(glob.glob("Passwords1.csv"), "output.xlsx")
Ejemplo n.º 29
0
                    device_page_link = driver.find_element_by_link_text(
                        'Devices')
                    device_page_link.click()
                    print('Go to devices page')
                    time.sleep(load_wait_time)
                    break
                else:
                    print(str(i), ':', 'retry', '[#', retry, ']',
                          'Error: cannot find download Excel button')
                    time.sleep(load_wait_time)

        print("Download all files in /tmp directory")
        #device_page_link = driver.find_element_by_link_text('Devices')
        #device_page_link.click()
        #time.sleep(load_wait_time)

    # XXX
    #device_list_elements = get_device_table_elements(driver) # refresh device list

    print("excel file converted")
    copyfile(download_dir + "xls_to_csv.csv", download_dir + "x_to_c.csv")
    merge_all_to_a_book(glob.glob(download_dir + "x_to_c.csv"),
                        upload_dir + "telecon.xlsx")
    print("copy telecon to reports all completed")

except Exception as err:
    raise err

else:
    driver.close()
Ejemplo n.º 30
0
    print('- Download de documentos dos trabalhos')
    trabalhos = pmap(download, trabalhos)
    print('- Converte documentos de PDF para texto')
    trabalhos = pmap(doc2txt, trabalhos)
    print('- Cria campos')
    referencias = flat(pmap(obter_campos, trabalhos))
    print('- Salva dados das referências em CSV (referencias.csv)')
    cria_csv(referencias, 'referencias.csv')
else:
    print(
        '2 Raspagem prévia encontrada (para refazer, delete o arquivo referencias.csv)'
    )
    referencias = le_csv('referencias.csv')

import unidecode
from pyexcel.cookbook import merge_all_to_a_book
import glob

grupos = list(set([i['GTR'] for i in referencias]))
for grupo in grupos:
    ref_grupo = [i for i in referencias if i['GTR'] == grupo]
    novo_nome = unidecode.unidecode(grupo)
    novo_nome = grupo.replace(' ', '_').replace(',',
                                                '').replace(':', '').replace(
                                                    '(', '').replace(')', '')
    cria_csv(ref_grupo, './csv/%s.csv' % novo_nome)
    merge_all_to_a_book(glob.glob("./csv/%s.csv" % novo_nome),
                        "./xlsx/%s.xlsx" % novo_nome)

print('4 Fim')
Ejemplo n.º 31
0
            if len(contents) > len(headers):
                headers.append(splits[0].strip())

    # 헤더를 파일에 입력합니다. 최초 1회만 실행됩니다.
    if not outfile_has_header:
        header = ", ".join(headers)
        temp_file.write(header)
        outfile_has_header = True

    # 결과물 파일에 내용물을 입력합니다.
    new_line = ", ".join(contents)
    temp_file.write("\n" + new_line)

    # 읽어온 파일을 종료합니다.
    file.close()

# 임시 결과물 파일을 종료합니다.
temp_file.close()

# 임시로 저장된 결과물 파일을 엑셀형태로 변환합니다.
PC.merge_all_to_a_book([temp_file_name], outfile_name)

# 임시로 저장된 결과물을 삭제합니다.
os.remove(temp_file_name)

# 작업 종료 메시지를 출력합니다.
print("Process Done.")

# 작업에 총 몇 초가 걸렸는지 출력합니다.
end_time = time.time()
print("The Job Took " + str(end_time - start_time) + " seconds.")