def handle(self, *args, **options):
        self._canvas = Canvas()
        self._tools = ExternalTools()
        self._accounts = Accounts()
        self._courses = Courses()
        self._options = options

        csv.register_dialect("unix_newline", lineterminator="\n")
        self._writer = csv.writer(sys.stdout, dialect="unix_newline")

        self._headers = [
            'tool_name', 'tool_id', 'tool_type', 'account_name', 'account_id'
        ]

        if self._options['courses']:
            self._headers.append('course_name')
            self._headers.append('course_id')
            self._headers.append('term')

        if options['sessionless']:
            self._headers.append('sessionless url')

        if re.match(r'^\d+$', options['account_id']):
            account = self._accounts.get_account(options['account_id'])
        else:
            account = self._accounts.get_account_by_sis_id(
                options['account_id'])

        try:
            self.report_external_tools(account)

        except DataFailureException as err:
            if err.status == 404:
                print('Unknown Sub-Account \"%s\"' % (options['account_id']),
                      file=sys.stderr)
    def handle(self, *args, **options):
        self._canvas = Canvas()
        self._tools = ExternalTools()
        self._accounts = Accounts()
        self._courses = Courses()
        self._options = options

        csv.register_dialect("unix_newline", lineterminator="\n")
        self._writer = csv.writer(sys.stdout, dialect="unix_newline")

        self._headers = ['tool_name', 'tool_id', 'tool_type', 'account_name', 'account_id']

        if self._options['courses']:
            self._headers.append('course_name')
            self._headers.append('course_id')
            self._headers.append('term')

        if options['sessionless']:
            self._headers.append('sessionless url')

        accounter = self._accounts.get_account if re.match(r'^\d+$', options['account_id']) \
                        else self._accounts.get_account_by_sis_id
        try:
            self.report_external_tools(accounter(options['account_id']))

        except DataFailureException as err:
            if err.status == 404:
                print >> sys.stderr, 'Unknown Sub-Account \"%s\"' % (options['account_id'])
Ejemplo n.º 3
0
 def test_register_kwargs(self):
     name = 'fedcba'
     csv.register_dialect(name, delimiter=';')
     try:
         self.assertTrue(csv.get_dialect(name).delimiter, '\t')
         self.assertTrue(list(csv.reader('X;Y;Z', name)), ['X', 'Y', 'Z'])
     finally:
         csv.unregister_dialect(name)
Ejemplo n.º 4
0
 def open_csv(self, path, field_names=None):
     class SKV(csv.excel):
         # like excel, but uses semicolons
         delimiter = ";"
     csv.register_dialect("SKV", SKV)
     f = csv.DictReader(open(path, 'U'), encoding='utf-8', dialect='SKV',
                        fieldnames=field_names)
     return f
Ejemplo n.º 5
0
 def test_register_kwargs(self):
     name = 'fedcba'
     csv.register_dialect(name, delimiter=';')
     try:
         self.assertNotEqual(csv.get_dialect(name).delimiter, '\t')
         self.assertEqual(list(csv.reader([b'X;Y;Z'], name)), [[u'X', u'Y', u'Z']])
     finally:
         csv.unregister_dialect(name)
Ejemplo n.º 6
0
 def test_register_kwargs(self):
     name = 'fedcba'
     csv.register_dialect(name, delimiter=';')
     try:
         self.assertTrue(csv.get_dialect(name).delimiter, '\t')
         self.assertTrue(list(csv.reader('X;Y;Z', name)), ['X', 'Y', 'Z'])
     finally:
         csv.unregister_dialect(name)
Ejemplo n.º 7
0
 def write_csv(self, filename='output.csv', make_strings=False):
     """Write the processed rows to the given filename
     """
     if (len(self.rows) <= 0):
         raise AttributeError('No rows were loaded')
     if make_strings:
         out = self.make_strings()
     else:
         out = self.rows
     with open(filename, 'wb+') as f:
         csv.register_dialect('vwo', delimiter='`')
         writer = csv.DictWriter(f, self.key_map.keys(), dialect='vwo')
         writer.writeheader()
         writer.writerows(out)
Ejemplo n.º 8
0
 def test_registry(self):
     class myexceltsv(csv.excel):
         delimiter = "\t"
     name = "myexceltsv"
     expected_dialects = csv.list_dialects() + [name]
     expected_dialects.sort()
     csv.register_dialect(name, myexceltsv)
     try:
         self.assertTrue(csv.get_dialect(name).delimiter, '\t')
         got_dialects = csv.list_dialects()
         got_dialects.sort()
         self.assertEqual(expected_dialects, got_dialects)
     finally:
         csv.unregister_dialect(name)
Ejemplo n.º 9
0
 def test_registry(self):
     class myexceltsv(csv.excel):
         delimiter = "\t"
     name = "myexceltsv"
     expected_dialects = csv.list_dialects() + [name]
     expected_dialects.sort()
     csv.register_dialect(name, myexceltsv)
     try:
         self.assertEqual(csv.get_dialect(name).delimiter, '\t')
         got_dialects = csv.list_dialects()
         got_dialects.sort()
         self.assertEqual(expected_dialects, got_dialects)
     finally:
         csv.unregister_dialect(name)
Ejemplo n.º 10
0
def main():
    logger = logging.getLogger('main')

    logger.info('Starting Process')

    logger.info('Reading ini file')
    config = ConfigParser.RawConfigParser()
    config.read('config.ini')

    consumer_key = config.get('twitter credentials', 'consumer_key')
    consumer_secret = config.get('twitter credentials', 'consumer_secret')

    access_token = config.get('twitter credentials', 'access_token')

    access_token_secret = config.get('twitter credentials',
                                     'access_token_secret')

    savetocsv = config.getboolean('CSV', 'enabled')
    logger.info('Authenticating')
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    api = tweepy.API(auth, wait_on_rate_limit=True)

    user = api.get_user('eeftychiou')

    logger.info("Connected to Twitter Api: %s",
                user._api.last_response.status_code)

    logger.info("Connecting to Database")
    dbacc = dal.TweetDal()

    # start consumer and continue scrapping
    logger.info("Starting Worker")
    TW_thread = threading.Thread(target=TWconsumer)
    TW_thread.start()

    #TODO load criteria from ini file
    searchTerms = 'refugee OR réfugié OR rifugiato OR flüchtling OR flykting OR ' \
                  'mülteci OR menekült OR refugees OR refugeeswelcome OR refugeecrisis OR ' \
                  'refugeesGR OR refugeeconvoy'
    searchTerms = 'refugee'

    dateFrom = "2015-08-29"
    dateToo = "2015-09-01"
    interval = 5  #days to sample each search
    maxTweetPerInterval = 250

    dtFrom = datetime.strptime(dateFrom, '%Y-%m-%d')
    dtToo = datetime.strptime(dateToo, '%Y-%m-%d')

    #setup csv writter
    if savetocsv:
        csv.register_dialect('myDialect', delimiter=';', quoting=csv.QUOTE_ALL)
        fname = dateFrom + "_" + dateToo + "_dump.csv"
        outputFile = open(fname, "w+")
        myFields = [
            'username', 'date', 'retweets', 'favorites', 'replies', 'text',
            'geo', 'mentions', 'hashtags', 'id', 'permalink', 'conversationId',
            'userid'
        ]
        writer = csv.DictWriter(outputFile,
                                fieldnames=myFields,
                                dialect='myDialect')
        writer.writeheader()

    logger.info('*** Criteria *** ')
    logger.info('searchTerms[%s]', searchTerms)
    logger.info('dateFrom[%s] to:[%s] interval[%i] maxTweetPerInterval[%i]',
                dateFrom, dateToo, interval, maxTweetPerInterval)

    for dtItfr in daterange(dtFrom, dtToo, interval):
        dtItfrStr = dtItfr.strftime("%Y-%m-%d")
        dtItToo = dtItfr + timedelta(interval)
        dtIttooStr = dtItToo.strftime("%Y-%m-%d")
        logger.info('Starting export for from: %s to: %s  ', dtItfrStr,
                    dtIttooStr)

        tweetCriteria = got.manager.TweetCriteria().setQuerySearch(
            searchTerms).setSince(dtItfrStr).setUntil(dtIttooStr).setMaxTweets(
                maxTweetPerInterval)
        tweets = got.manager.TweetManager.getTweets(tweetCriteria)

        if savetocsv:
            for t in tweets:
                writer.writerow(t.data)
            logger.info(' Rows %d saved to file...\n' % len(tweets))

        tweetIDs = [
            x.data['id'] for x in tweets if not dbacc.tweetExists(x.data['id'])
        ]

        dbacc.add_jobs('tweet', tweetIDs)

    logger.info('Finished Processing')
Ejemplo n.º 11
0
    def test_dialect_apply(self):
        class testA(csv.excel):
            delimiter = "\t"

        class testB(csv.excel):
            delimiter = ":"

        class testC(csv.excel):
            delimiter = "|"

        csv.register_dialect('testC', testC)
        try:
            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj)
                writer.writerow([1, 2, 3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1,2,3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj, testA)
                writer.writerow([1, 2, 3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1\t2\t3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj, dialect=testB())
                writer.writerow([1, 2, 3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1:2:3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj, dialect='testC')
                writer.writerow([1, 2, 3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1|2|3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj, dialect=testA, delimiter=';')
                writer.writerow([1, 2, 3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1;2;3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

        finally:
            csv.unregister_dialect('testC')
Ejemplo n.º 12
0
#!/usr/bin/env python2.7
# -*- encoding: utf-8 -*-

from __future__ import unicode_literals

import ast
import codecs
import collections
from datetime import datetime
import sys

import unicodecsv as csv

import graant_redmine

csv.register_dialect('hledger', delimiter=b',', quoting=csv.QUOTE_ALL)


def read_config(path):
    with codecs.open(path, 'r', 'utf-8') as f:
        return ast.literal_eval(f.read())


def split_account(a):
    return a.split(":")


LogEntry = collections.namedtuple(
    'LogEntry',
    ['date', 'is_uploaded', 'account', 'account_path', 'hours', 'comment'])
Ejemplo n.º 13
0
def regenerate_course_data_in_csv_format(course):
    """
    Get all data the ORA has for a course in CSV format, and upload it to S3.
    course - A course id string.
    """
    # Set up an output for our csv file.
    tempfile_write = TemporaryFile()

    # Get all locations in the course.
    locations = [l['location'] for l in Submission.objects.filter(course_id=course).values('location').distinct()]

    # Set up our csv writer.
    csv.register_dialect('ora', delimiter=',', quoting=csv.QUOTE_MINIMAL, doublequote=True)

    keys = None

    # Loop through all of the locations in the course to generate data.
    for (i, location) in enumerate(locations):
        subs = Submission.objects.filter(location=location)

        for sub in subs:
            values = []

            # Get all the scores and feedback for each submission.
            grader_info = sub.get_all_successful_scores_and_feedback()
            submission_text = sub_commas(encode_ascii(sub.student_response))

            # Some submissions have multiple graders, in which case score is a list.
            # Handle these cases by breaking them down into separate rows.
            if isinstance(grader_info['score'], list):
                for j in xrange(0, len(grader_info['score'])):
                    new_grader_info = {'submission_text': submission_text}
                    # Any key that is a list should be broken down, any other key should
                    # be passed into the row like normal.
                    for key in grader_info:
                        if isinstance(grader_info[key], list):
                            new_grader_info.update({key: grader_info[key][j]})
                        else:
                            new_grader_info.update({key: grader_info[key]})
                    values.append(new_grader_info)
            else:
                grader_info['submission_text'] = submission_text
                values.append(grader_info)

            for val in values:
                val['feedback'] = sub_commas(encode_ascii(val['feedback']))

            # Set up the header keys, csv writer, and header row.
            if keys is None:
                keys = [k for k in values[0]]
                writer = csv.DictWriter(tempfile_write, keys, dialect='ora')
                writer.writeheader()

            # Write the rows to csv.
            for v in values:
                writer.writerow(v)

    # Go back to the beginning of the string.
    tempfile_write.seek(0)
    filename = get_course_data_filename(course)

    # If we have an S3 account setup, upload, otherwise write to a local file.
    if settings.AWS_ACCESS_KEY_ID != "":
        # Upload the csv file to S3 and close the StringIO object.
        conn = S3Connection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
        bucket = conn.create_bucket(settings.S3_BUCKETNAME.lower())
        k = Key(bucket)
        k.key = filename
        k.set_contents_from_file(tempfile_write)
        tempfile_write.close()
    else:
        with open(os.path.abspath(os.path.join(settings.COURSE_DATA_PATH, filename)), "w") as f:
            f.write(tempfile_write.read())
Ejemplo n.º 14
0
    argparser.add_argument('search_query', help=search_query_help)

    args = argparser.parse_args()

    #place = args.place

    search_query = args.search_query
    place = "Miami"
    #search_query = "restaurants"
    '''Scraping data'''
    scraped_data = parse(search_query)

    print("Writing data to output file")

    csv.register_dialect('sc', delimiter=';')
    '''ab concatenate'''
    mode = "wb"

    with open("scraped_yelp_results_for_" + search_query + "_%s.csv" % (place),
              mode) as fp:

        fieldnames = [
            'business_name', 'rank', 'review_count', 'categories', 'rating',
            'address', 'longitude', 'latitude', 'price_range', 'url'
        ]

        writer = csv.DictWriter(fp,
                                fieldnames=fieldnames,
                                dialect='sc',
                                extrasaction='ignore',
Ejemplo n.º 15
0
    def _write_local_data_files(self, cursor):
        """
        Takes a cursor, and writes results to a local file.

        :return: A dictionary where keys are filenames to be used as object
            names in GCS, and values are file handles to local files that
            contain the data for the GCS objects.
        """
        schema = list(
            map(lambda schema_tuple: schema_tuple[0], cursor.description))
        file_no = 0
        tmp_file_handle = NamedTemporaryFile(delete=True)
        tmp_file_handles = {self.filename.format(file_no): tmp_file_handle}

        # Save file header for csv if required
        if (self.export_format['file_format'] == 'csv'):

            # Deal with CSV formatting. Try to use dialect if passed
            if ('csv_dialect' in self.export_format):
                # Use dialect name from params
                dialect_name = self.export_format['csv_dialect']
            else:
                # Create internal dialect based on parameters passed
                dialect_name = 'mysql_to_gcs'
                csv.register_dialect(
                    dialect_name,
                    delimiter=self.export_format.get('csv_delimiter') or ',',
                    doublequote=self.export_format.get('csv_doublequote')
                    or 'True',
                    escapechar=self.export_format.get('csv_escapechar')
                    or None,
                    lineterminator=self.export_format.get('csv_lineterminator')
                    or '\r\n',
                    quotechar=self.export_format.get('csv_quotechar') or '"',
                    quoting=eval(
                        self.export_format.get('csv_quoting')
                        or 'csv.QUOTE_MINIMAL'))
            # Create CSV writer using either provided or generated dialect
            csv_writer = csv.writer(tmp_file_handle,
                                    encoding='utf-8',
                                    dialect=dialect_name)

            # Include column header in first row
            if ('csv_columnheader' in self.export_format
                    and eval(self.export_format['csv_columnheader'])):
                csv_writer.writerow(schema)

        for row in cursor:
            # Convert datetimes and longs to BigQuery safe types
            row = map(self.convert_types, row)

            # Save rows as CSV
            if (self.export_format['file_format'] == 'csv'):
                csv_writer.writerow(row)
            # Save rows as JSON
            else:
                # Convert datetime objects to utc seconds, and decimals to floats
                row_dict = dict(zip(schema, row))

                # TODO validate that row isn't > 2MB. BQ enforces a hard row size of 2MB.
                s = json.dumps(row_dict, sort_keys=True)
                if PY3:
                    s = s.encode('utf-8')
                tmp_file_handle.write(s)

                # Append newline to make dumps BigQuery compatible.
                tmp_file_handle.write(b'\n')

            # Stop if the file exceeds the file size limit.
            if tmp_file_handle.tell() >= self.approx_max_file_size_bytes:
                file_no += 1
                tmp_file_handle = NamedTemporaryFile(delete=True)
                tmp_file_handles[self.filename.format(
                    file_no)] = tmp_file_handle

                # For CSV files, weed to create a new writer with the new handle
                # and write header in first row
                if (self.export_format['file_format'] == 'csv'):
                    csv_writer = csv.writer(tmp_file_handle,
                                            encoding='utf-8',
                                            dialect=dialect_name)
                    if ('csv_columnheader' in self.export_format
                            and eval(self.export_format['csv_columnheader'])):
                        csv_writer.writerow(schema)

        return tmp_file_handles
Ejemplo n.º 16
0
import_dir= sys.argv[1] + "/"
user_list = import_dir + "all-students.txt"
students  = 1
if not os.path.exists( user_list ):
    user_list=import_dir + "all-user.txt"
    students = 0
if not os.path.exists( import_dir + "/passwordfiles" ):
  os.mkdir( import_dir + "passwordfiles", 0770 );

all_classes = []
with open(user_list) as csvfile:
    #Detect the type of the csv file
    dialect = unicodecsv.Sniffer().sniff(csvfile.read(1024))
    csvfile.seek(0)
    #Create an array of dicts from it
    unicodecsv.register_dialect('oss',dialect)
    reader = unicodecsv.DictReader(csvfile,dialect='oss')
    for row in reader:
        fobj = open("/usr/share/oss/templates/password.html","r")
        template = fobj.read().decode('utf8')
        fobj.close()
        uid=""
        group=""
        for field in reader.fieldnames:
            template = template.replace(field,escape(row[field]))
            if field == "UID" or field == "BENUTZERNAME" or field == "LOGIN":
                uid=row[field]
            if students == 1 and ( field == "CLASS" or field == "KLASSE" ):
                group=row[field]
                if group not in all_classes:
                    all_classes.append(group)
Ejemplo n.º 17
0
    'Sponsor Agency',
    'Drinking Water Supply',
    'Water Quality Improvement',
    'Water Reuse/Recycling',
    'Stormwater Improvements',
    'Groundwater Benefits',
    'Infiltration',
    'Habitat Protection and Restoration',
    'Flood Protection'
]

csv.register_dialect(
    'dialect',
    delimiter=',',
    quotechar='"',
    doublequote=False,
    skipinitialspace=True,
    lineterminator='\r\n',
    escapechar='\\',
    quoting=csv.QUOTE_ALL
)

'''
Routines to standardize reading from
and writing to this db
These functions return a list
with ls[0] as the reader/writer and
ls[1] as the open file handle that
one can close by calling closeDB(ls[1])
'''
def createReader(dbname):
    dbname = io.open(dbname, kDefautlReadingFmt)
Ejemplo n.º 18
0
    def test_dialect_apply(self):
        class testA(csv.excel):
            delimiter = "\t"
        class testB(csv.excel):
            delimiter = ":"
        class testC(csv.excel):
            delimiter = "|"

        csv.register_dialect('testC', testC)
        try:
            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj)
                writer.writerow([1,2,3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1,2,3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj, testA)
                writer.writerow([1,2,3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1\t2\t3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj, dialect=testB())
                writer.writerow([1,2,3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1:2:3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj, dialect='testC')
                writer.writerow([1,2,3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1|2|3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

            fd, name = tempfile.mkstemp()
            fileobj = os.fdopen(fd, "w+b")
            try:
                writer = csv.writer(fileobj, dialect=testA, delimiter=';')
                writer.writerow([1,2,3])
                fileobj.seek(0)
                self.assertEqual(fileobj.read(), "1;2;3\r\n")
            finally:
                fileobj.close()
                os.unlink(name)

        finally:
            csv.unregister_dialect('testC')
Ejemplo n.º 19
0
import unicodecsv as csv
from unicodecsv import register_dialect, Dialect, QUOTE_MINIMAL
from typing import List, Optional, Dict


class CrLfDialect(Dialect):
    delimiter = ','
    quotechar = '"'
    doublequote = True
    skipinitialspace = True
    lineterminator = '\r\n'
    quoting = QUOTE_MINIMAL


register_dialect("crlf", CrLfDialect)


class LfDialect(Dialect):
    delimiter = ','
    quotechar = '"'
    doublequote = True
    skipinitialspace = True
    lineterminator = '\n'
    quoting = QUOTE_MINIMAL


register_dialect("lf", LfDialect)

PYTHON2 = sys.version_info < (3, 0)