def handle(self, *args, **options):
        if not options.has_key('file'):
            raise CommandError("An output filename must be specified with -f=")
        if not options.has_key('city'):
            raise CommandError("At least one city must be specified with -c=")

        file = open(options['file'],'w')
        csv = UnicodeWriter(file)
        
        for city_name in options['city'].split(','):
            print city_name
            city = City.objects.get(name=city_name)
            wards = Ward.objects.filter(city=city)
            for ward in wards:
                row = [ city.name, ward.name, ward.councillor.first_name, ward.councillor.last_name, ward.councillor.email]
                csv.writerow(row)
Example #2
0
def csv_out(imdb, file): 
    print '\nWriting to', file
    username = os.path.splitext(os.path.basename(file))[0]
    with codecs.open(file, 'wb') as outfile:
        w = UnicodeWriter(outfile)
        row0 = []
        for n in imdb:
            row0.extend(n.keys())
        row0 = set(row0)
        w.writerow(row0)
        for n in imdb:
            row1 = []
            for m in row0:
                v = n.get(m)
                if(v == None):
                    v = ""
                row1.append(v)
            w.writerow(row1)
Example #3
0
    def handle(self, *args, **options):
        if not options.has_key('file'):
            raise CommandError("An output filename must be specified with -f=")
        if not options.has_key('city'):
            raise CommandError("At least one city must be specified with -c=")

        file = open(options['file'], 'w')
        csv = UnicodeWriter(file)

        for city_name in options['city'].split(','):
            print city_name
            city = City.objects.get(name=city_name)
            wards = Ward.objects.filter(city=city)
            for ward in wards:
                row = [
                    city.name, ward.name, ward.councillor.first_name,
                    ward.councillor.last_name, ward.councillor.email
                ]
                csv.writerow(row)
Example #4
0
    def export_db(self, **kwargs):
        if kwargs["format"] == "csv":

            sql = "PRAGMA table_info(GPPT_Submissions)"
            schema = self.cur.execute(sql).fetchall()
            colnames = []
            for col in schema:
                colnames.append(col[1])
            colnames.remove("Attachment_Binary")
            colnames.remove("Message_Id")
            colnames.remove("Attachment_Id")

            sql = '''SELECT Id, Filename, Submitter, Region, Date, Lead_Office, P_Margin, Tot_Fee, Blended_Rate, 
            Tot_Hours, Hours_Mgr, Hours_SPM, Hours_PM, Hours_Cons, Hours_Assoc, Method, Tool_Version 
            FROM GPPT_Submissions'''

            results = self.cur.execute(sql).fetchall()

            with open("GPPT_Submissions.csv", "w") as f:
                writer = UnicodeWriter(f)
                writer.writerow(colnames)
                # for row in results:
                #     utf8_row = []
                #     for cell in row:
                #         if isinstance(cell, unicode):
                #             s = cell.decode('utf-8')
                #         elif isinstance(cell, int):
                #             s = str(cell)
                #         utf8_row.append(s.encode('utf-8'))
                for row in results:
                    writer.writerow(row)

            logging.info("Database dumped to GPPT_Submissions.csv")
Example #5
0
def csv_out(imdb, file):
    print '\nWriting to', file
    username = os.path.splitext(os.path.basename(file))[0]
    with codecs.open(file, 'wb') as outfile:
        w = UnicodeWriter(outfile)
        row0 = []
        for n in imdb:
            row0.extend(n.keys())
        row0 = set(row0)
        w.writerow(row0)
        for n in imdb:
            row1 = []
            for m in row0:
                v = n.get(m)
                if (v == None):
                    v = ""
                row1.append(v)
            w.writerow(row1)
Example #6
0
    def handle(self):
        """
        Reply to incoming requests.

        Request:
            <COMMAND> <FILTER-TYPE> [SUBSTRING ...]
            COMMAND:     ALL|OPEN|INGAME
            FILTER-TYPE: NONE|MOD|HOST|DESC
            SUBSTRING:   The text to look for in the column FILTER-TYPE. If
                         space[s] is encountered, each word must be in the
                         field (AND). If '|' is encountered, word[s] before
                         and after it will be searched for separately and
                         all results will be returned (OR).
        Reply:
            1st line: 'START <ISO 8601 timestamp, UTC>'
            2nd: List of hosts as an UTF-8 encoded CSV using ; as separator and
               quoting every field. The list will be filtered if
               FILTER-TYPE != NONE.
            3rd: 'END <length of list>'
        """
        try:
            for line in self.rfile:
                # loop until disconnect or server shutdown
                if self.server.shutdown_now:
                    logger.info("(%s:%d) server shut down already, bye bye",
                                self.client_address[0], self.client_address[1])
                    self.finish()
                    return
                # remote sockets are not always closed, kill myself after MAX_CONNECTION_LENGTH seconds
                if datetime.datetime.now(
                ) - self.thread.start_time > datetime.timedelta(
                        seconds=MAX_CONNECTION_LENGTH):
                    logger.info(
                        "(%s:%d) Running since %s (>%d sec) in thread %s, killing myself.",
                        self.client_address[0], self.client_address[1],
                        self.thread.start_time.strftime("%Y-%m-%d %H:%M:%S"),
                        MAX_CONNECTION_LENGTH, self.thread.name)
                    self.finish()
                    return

                self.server.query_stats_add(line)
                line = line.split()
                if len(line) < 2 or (len(line) == 2 and line[1] != "NONE"):
                    logger.error("(%s:%d) Format error: '%s'",
                                 self.client_address[0],
                                 self.client_address[1], line)
                    continue
                # COMMAND
                if line[0] == "ALL":
                    host_list = self.hosts.values()
                elif line[0] == "OPEN":
                    host_list = self.hosts_open.values()
                elif line[0] == "INGAME":
                    host_list = self.hosts_ingame.values()
                else:
                    logger.error("(%s:%d) Unknown COMMAND '%s'.",
                                 self.client_address[0],
                                 self.client_address[1], line[0])
                    continue
                # FILTER-TYPE
                if line[1] == "NONE":
                    host_list_filtered = host_list
                elif line[1] == "MOD":
                    host_list_filtered = list()
                    for words in " ".join(line[2:]).split("|"):
                        host_list_filtered.extend([
                            host for host in host_list
                            if substr_search(words, host.gameName)
                        ])
                elif line[1] == "HOST":
                    host_list_filtered = list()
                    for words in " ".join(line[2:]).split("|"):
                        host_list_filtered.extend([
                            host for host in host_list
                            if substr_search(words, host.founder)
                        ])
                else:
                    logger.error("(%s:%d) Unknown FILTER-TYPE '%s'.",
                                 self.client_address[0],
                                 self.client_address[1], line[0])
                    continue

                response = u"START %s\n" % datetime.datetime.utcnow(
                ).isoformat()
                if len(host_list_filtered) > 0:
                    csvfile = cStringIO.StringIO()
                    csvwriter = UnicodeWriter(csvfile, quoting=csv.QUOTE_ALL)
                    csvwriter.writerow(host_list_filtered[0].as_list_header())
                    csvwriter.writerows(
                        [host.as_list() for host in host_list_filtered])
                    response += csvfile.getvalue()
                    csvfile.close()
                response += u"END %d\n" % len(host_list_filtered)
                self.wfile.write(response)
        except socket.error, so:
            # client disconnected. that's OK, thread will terminate now
            logger.debug(
                "(%s:%d) client disconnected after %0.1f min",
                self.client_address[0], self.client_address[1],
                (datetime.datetime.now() - self.thread.start_time).seconds /
                60.0)
            self.finish()
            return
        resp = session.get(url)
    logger.info('Parsing content')
    parsed_html = bs4.BeautifulSoup(resp.text, 'html.parser')
    # Note: There should be only one element called div.desc
    # but there's no guarantee
    pages_text = parsed_html.find('div', class_='desc').get_text()
    config['num_pages'] = int(re.search('Page 1 of ([0-9]+)', pages_text).group(1))
    print 'Found {0} pages'.format(config['num_pages'])
    if args.start > config['num_pages']:
        print 'Start page', args.start, 'is greater than found pages:', config['num_pages']
        print 'Setting start pages to last page'
        args.start = config['num_pages']
    imdb_all = []
    username = os.path.splitext(os.path.basename(args.outfile))[0]
    with codecs.open(args.outfile, 'wb') as outfile:
        w = UnicodeWriter(outfile)
        # Only output header if file didn't exist
        w.writerow(['position','const','created','modified','description','Title','Title type','Directors',
                    '{0} rated'.format(username),'IMDb Rating','Runtime (mins)','Year','Genres','Num. Votes',
                    'Release Date (month/day/year)','URL'])
    for page in get_start_positions(config['num_pages'], args.start):
        pool.spawn(download_page, page[0], page[1])
    pool.join()
    with codecs.open(args.outfile, 'ab') as outfile:
        w = UnicodeWriter(outfile)
        w.writerows(imdb_all)
    end_time = time.time()
    print 'Downloaded', len(imdb_all), 'ratings in', pretty_seconds(end_time - start_time)
    logger.info('Downloaded %s ratings in %s', len(imdb_all), pretty_seconds(end_time - start_time))
    print 'Saved results in', args.outfile
Example #8
0
    def handle(self):
        """
        Reply to incoming requests.

        Request:
            <COMMAND> <FILTER-TYPE> [SUBSTRING ...]
            COMMAND:     ALL|OPEN|INGAME
            FILTER-TYPE: NONE|MOD|HOST|DESC
            SUBSTRING:   The text to look for in the column FILTER-TYPE. If
                         space[s] is encountered, each word must be in the
                         field (AND). If '|' is encountered, word[s] before
                         and after it will be searched for separately and
                         all results will be returned (OR).
        Reply:
            1st line: 'START <ISO 8601 timestamp, UTC>'
            2nd: List of hosts as an UTF-8 encoded CSV using ; as separator and
               quoting every field. The list will be filtered if
               FILTER-TYPE != NONE.
            3rd: 'END <length of list>'
        """
        try:
            for line in self.rfile:
                # loop until disconnect or server shutdown
                if self.server.shutdown_now:
                    logger.info("(%s:%d) server shut down already, bye bye", self.client_address[0],
                                self.client_address[1])
                    self.finish()
                    return
                # remote sockets are not always closed, kill myself after MAX_CONNECTION_LENGTH seconds
                if datetime.datetime.now() - self.thread.start_time > datetime.timedelta(seconds=MAX_CONNECTION_LENGTH):
                    logger.info("(%s:%d) Running since %s (>%d sec) in thread %s, killing myself.",
                                self.client_address[0], self.client_address[1],
                                self.thread.start_time.strftime("%Y-%m-%d %H:%M:%S"), MAX_CONNECTION_LENGTH,
                                self.thread.name)
                    self.finish()
                    return

                self.server.query_stats_add(line)
                line = line.split()
                if len(line) < 2 or (len(line) == 2 and line[1] != "NONE"):
                    logger.error("(%s:%d) Format error: '%s'", self.client_address[0], self.client_address[1], line)
                    continue
                # COMMAND
                if line[0] == "ALL":
                    host_list = self.hosts.values()
                elif line[0] == "OPEN":
                    host_list = self.hosts_open.values()
                elif line[0] == "INGAME":
                    host_list = self.hosts_ingame.values()
                else:
                    logger.error("(%s:%d) Unknown COMMAND '%s'.", self.client_address[0], self.client_address[1], line[0])
                    continue
                # FILTER-TYPE
                if line[1] == "NONE":
                    host_list_filtered = host_list
                elif line[1] == "MOD":
                    host_list_filtered = list()
                    for words in " ".join(line[2:]).split("|"):
                        host_list_filtered.extend([host for host in host_list if substr_search(words, host.gameName)])
                elif line[1] == "HOST":
                    host_list_filtered = list()
                    for words in " ".join(line[2:]).split("|"):
                        host_list_filtered.extend([host for host in host_list if substr_search(words, host.founder)])
                else:
                    logger.error("(%s:%d) Unknown FILTER-TYPE '%s'.", self.client_address[0], self.client_address[1],
                                 line[0])
                    continue

                response = u"START %s\n" % datetime.datetime.utcnow().isoformat()
                if len(host_list_filtered) > 0:
                    csvfile = cStringIO.StringIO()
                    csvwriter = UnicodeWriter(csvfile, quoting=csv.QUOTE_ALL)
                    csvwriter.writerow(host_list_filtered[0].as_list_header())
                    csvwriter.writerows([host.as_list() for host in host_list_filtered])
                    response += csvfile.getvalue()
                    csvfile.close()
                response += u"END %d\n" % len(host_list_filtered)
                self.wfile.write(response)
        except socket.error, so:
            # client disconnected. that's OK, thread will terminate now
            logger.debug("(%s:%d) client disconnected after %0.1f min", self.client_address[0], self.client_address[1],
                (datetime.datetime.now() - self.thread.start_time).seconds/60.0)
            self.finish()
            return
from bs4 import BeautifulSoup
from unicodewriter import UnicodeWriter
from urllib import urlopen, urlencode
import csv
outfile = open('linkedin_job_companyname.csv','w')
out_wrtr = UnicodeWriter(outfile)
out_wrtr.writerow(['title','company','location','description'])
url = 'https://www.linkedin.com/jobs/search?keywords=Tableau&locationId=us:0&orig=JSERP&count=50&'
#https://www.linkedin.com/jobs/search?keywords=Tableau&locationId=us:0&orig=JSERP&start=0&count=50
data = {}
for i in range(25):
    data['start'] = i*50
    data_url = url + urlencode(data)
    soup = BeautifulSoup(urlopen(data_url),'lxml')
    for item in soup.findAll('li','job-listing'):
        title = item.find('span','job-title-text').text
        company = item.find('span','company-name-text').text
        location = item.find('span','job-location').find('span').text
        description = item.find('div','job-description').text
        print 'Running: ' + str(i*50) + ' page.'
        print '#'.join([title, company, location, description])
        print '\n'
        out_wrtr.writerow([title, company, location, description])
outfile.close()