def sec_based(a, b, c=3600): path = r'C:\StockData\Hourly' if not os.path.exists(path): os.makedirs(path) warnings.filterwarnings("ignore") url = "https://www.google.com/finance/getprices?i=%d&p=%dd&f=d,o,h,l,v,c&df=cpct&q=%s" % ( c, b, a) csv = u(url).read() temp = open('Stocktemp.csv', 'wb') temp.write(csv) temp.close() temp = open('Stocktemp.csv', 'r') lines = temp.readlines() temp.close() temp = open("Stocktemp.csv", 'w') temp.write(lines[4][8:]) temp.write(lines[7][1:]) for i in lines[8:]: temp.write(i) temp.close() d1 = pd.read_csv("Stocktemp.csv") os.remove('Stocktemp.csv') for i in range(1, d1.shape[0]): d1['DATE'][i] = d1['DATE'][i - 1] + c for i in range(d1.shape[0]): d1["DATE"][i] = datetime.utcfromtimestamp(float( d1["DATE"][i])).strftime('%Y-%m-%d %H:%M:%S') d1.to_csv("%s\%s.csv" % (path, a), index=False)
def daily(a, b, c, d, e, f, g): path = r'C:\StockData\Daily' if not os.path.exists(path): os.makedirs(path) x = c - 1 y = f - 1 url = "http://real-chart.finance.yahoo.com/table.csv?s=%s&a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&g=d&ignore=.csv" % ( a, x, b, d, y, e, g) csv = u(url).read() temp = open("%s\%s.csv" % (path, a), 'wb+') temp.write(csv) temp.close()
def bse(): import time import zipfile path = r'C:\StockData\StockList' if not os.path.exists(path): os.makedirs(path) x = datetime.fromtimestamp(time.time()) if (x.weekday() < 2): if ((x.day - 4) < 10): temp = '0' + str(x.day - 4) else: temp = str(x.day - 4) else: if ((x.day - 2) < 10): temp = '0' + str(x.day - 2) else: temp = str(x.day - 2) if ((x.month < 10)): temp += '0' temp += str(x.month) else: temp += str(x.month) temp += str(x.year % 1000) url = "http://www.bseindia.com/download/BhavCopy/Equity/EQ%s_CSV.ZIP" % temp csv = u(url).read() temp1 = open('temp.zip', 'wb') temp1.write(csv) temp1.close() temp1 = zipfile.ZipFile("temp.zip") data = temp1.read("EQ%s.CSV" % temp) temp1.close() temp2 = open('Stocktemp.csv', 'wb') temp2.write(data) temp2.close() df1 = pd.read_csv('Stocktemp.csv', usecols=('SC_CODE', 'SC_NAME')) df1.to_csv(r"%s\BSElist.csv" % (path), index=False) os.remove('Stocktemp.csv') os.remove('temp.zip')
import os from urllib2 import urlopen from urllib import urlretrieve as u from bs4 import BeautifulSoup r = urlopen("http://tutorialspoint.com/tutorialslibrary.htm").read() b = BeautifulSoup(r, 'html.parser') ull = b.findAll('ul', {'class': 'menu'}) for ul in ull: cd = ul['id'] os.mkdir(cd) print "Creating Directory - %s" % cd os.chdir(cd) print "Working Directory - %s" % cd links = ul.findAll('a') for link in links: try: sub = link['href'].split("/")[1] fname = sub + "_tutorial.pdf" flink = "http://tutorialspoint.com/" + sub + "/" + fname print "Processing Link - %s" % flink u(flink, fname) except Exception, e: print "[!!] err - No PDF for " + str(link['href']) continue os.chdir("..")
"""Extracting Data from XML. In this assignment you will write a Python program somewhat similar to http://www.pythonlearn.com/code/geoxml.py. """ from urllib import urlopen as u from xml.etree.ElementTree import fromstring as fs default = 'http://python-data.dr-chuck.net/comments_297292.xml' url = raw_input('Enter location: ') or default data = u(url).read() print 'Retrieving', url, "\nRetrieved", len(data), 'characters' counts = fs(data).findall('.//count') kounts = list() for count in counts: kounts.append(int(count.text)) Sum = reduce(lambda c, s: c + s, kounts) print "Count: ", len(counts), "\nSum: "