Ejemplo n.º 1
0
    def _load(self):

        dir_info = self._data['connect_info'].get('directory')
        file_n = self._data['connect_info'].get('file_name')
        full_name = os.path.join(dir_info, file_n)

        i = 0
        with open(full_name, 'r') as txt_file:
            csv_d_rdr = csv.DictReader(txt_file)
            for r in csv_d_rdr:
                if i == 0:
                    if not Helper.is_empty(self._data['key_columns']):
                        if not Helper.is_column_list_valid(
                                self._data['key_columns'], r.keys()):
                            self._logger.error(
                                'Specified primary keys don\'t match table columns'
                            )
                            raise Exception
                self._add_row(r)
                i = i + 1

        if not Helper.is_empty(self._data['key_columns']):
            if not self._check_primary_key_constraint_for_first_load():
                self._rows = []
                self._logger.error(
                    'The specified primary keys don\'t comply with primary key constraint'
                )
                raise Exception

        self._logger.debug('CSVDataTable._load: Loaded ' +
                           str(len(self._rows)) + ' rows')
Ejemplo n.º 2
0
    def delete_by_template(self, template, limit=None):
        '''

        :param template: Template to determine rows to delete.
        :return: Number of rows deleted.
        '''
        if not Helper.is_empty(template):
            if not Helper.is_template_valid(template, self.get_columns()):
                self._logger.error(
                    'Some columns in the specified template don\'t match table columns'
                )
                raise Exception

        rows = self.get_rows()
        r_indexes = []
        for i in range(0, len(rows)):
            if limit is not None:
                if len(r_indexes) == limit:
                    break

            if Helper.matches_template(rows[i], template):
                r_indexes.append(i)

        if len(r_indexes) == 0:
            return 0
        elif len(r_indexes) == 1:
            self._delete_row(r_indexes[0])
            return 1
        else:
            self._delete_rows(r_indexes)
            return len(r_indexes)
Ejemplo n.º 3
0
    def delete_by_template(self, template):
        '''

        :param template: Template to determine rows to delete.
        :return: Number of rows deleted.
        '''
        template_string = ''
        if not Helper.is_empty(template):
            if not Helper.is_template_valid(template, self.get_columns()):
                self._logger.error(
                    'Some columns in the specified template don\'t match table columns'
                )
                raise Exception
            template_string = 'WHERE ' + self._compose_template_string(
                template)

        query = 'DELETE FROM ' + '`' + self._data[
            'table_name'] + '` ' + template_string
        with self._connection.cursor() as cursor:
            try:
                cursor.execute(query)
                if cursor.rowcount > 0:
                    if self._auto_commit:
                        self.commit()
                return cursor.rowcount
            except pymysql.Error as error:
                self._logger.error(
                    'Failed to delete record(s) in the table {}'.format(error))
                raise Exception
Ejemplo n.º 4
0
    def find_by_primary_key(self, key_fields, field_list=None):
        '''

        :param key_fields: The list with the values for the key_columns, in order, to use to find a record.
        :param field_list: A subset of the fields of the record to return.
        :return: None, or a dictionary containing the requested fields for the record identified
            by the key.
        '''
        if Helper.is_empty(self._data['key_columns']):
            self._logger.error('Table has no primary keys')
            raise Exception

        if not Helper.are_key_fields_valid(key_fields,
                                           self._data['key_columns']):
            self._logger.error('Key fields are not valid')
            raise Exception

        template = Helper.convert_key_fields_to_template(
            key_fields, self._data['key_columns'])
        result = self.find_by_template(template, field_list, 1)

        if Helper.is_empty(result):
            return None

        return result[0]
Ejemplo n.º 5
0
def main():
    arguments = DictObj(vars(ArgsParser.getArgv(ArgsParser.createParser())))
    # arguments = DictObj({"command": "dir", "file": "D:\PyProject\setterdog\src\\user_config.json", "debug": 0})
    # print(arguments)
    debug.getInstance(arguments.debug, "./logger.log", 0, 0)

    Configuration.getInstance(arguments.file, arguments.command)
    # print(Configuration.GI().config)

    """
    debug.getInstance(logging.getLevelName(Configuration.GI().config.debug.logLevel),
                      Configuration.GI().config.debug.fileName, 0, 0)"""

    Helper.createFolder(Configuration.GI().config.output.outputDir)
    Helper.createFolder(Configuration.GI().config.report.reportDir)

    outputFile = os.path.join(Configuration.GI().config.output.outputDir,
                              Configuration.GI().config.output.outputFileNamePattern.
                              format(date=Helper.getCurrentDate()))
    stdout = StdoutData(Configuration.GI().config.launchCommand, True, outputFile)

    a = Analyzer(Configuration.GI().config.pattern)
    a.calculatePatternMatches(stdout.getOutputData())

    report = Reporter(Configuration.GI().config.report.reportDir,
                      Configuration.GI().config.report.reportFileNamePattern.format(date=Helper.getCurrentDate()),
                      a.expandedPattern)
    report.writeReport()
    a.returnCode(stdout.returnCode)
Ejemplo n.º 6
0
 def getOutputData(self):
     """
         @return {String} - returns output of command
     """
     try:
         process = Popen(self.command, stdout=PIPE)
         # output = str(process.stdout.read())  # Use for big data of stdout
         (output, error) = process.communicate()  # Store data in clipboard
         self.returnCode = process.returncode
         if self.returnCode == 0:
             res = None
             if output:
                 res = str(output)
                 if self.writeToFile and self.fileName:
                     lines = res.split('\\n')
                     Helper.createNewFile(self.fileName,
                                          os.linesep.join(lines), 'w')
             return res
         else:
             debug.log().error(
                 "Command '{}' failed, exit-code = {}, error = {}".format(
                     self.command, self.returnCode, str(error)))
     except (ValueError, OSError, TypeError) as err:
         debug.log().critical(err)
         Helper.systemExit()
Ejemplo n.º 7
0
 def checkValidConfigKey(self):
     """
         Check the availability of required keys
     """
     for obj in self.config.pattern:
         for key in self.validKeys:
             if key not in obj:
                 debug.log().critical(
                     "Not valid config pattern {}".format(obj))
                 Helper.systemExit()
Ejemplo n.º 8
0
    def _violate_primary_key_constraint(self, new_keys_template):
        if Helper.is_empty(self._data['key_columns']):
            return False

        key_fields = Helper.extract_key_fields_from_template(
            new_keys_template, self._data['key_columns'])

        records = self.find_by_primary_key(key_fields)
        if Helper.is_empty(records):
            return False

        return True
Ejemplo n.º 9
0
    def insert(self, new_record):
        '''

        :param new_record: A dictionary representing a row to add to the set of records.
        :return: None
        '''
        if not Helper.is_new_record_valid(new_record, self.get_columns()):
            self._logger.error('new_record must contains all columns')
            raise Exception

        column_strings = '('
        value_strings = '('
        for key in new_record:
            column_strings = column_strings + '`' + key + '`' + ', '
            value_strings = value_strings + '\'' + new_record[key] + '\'' + ', '

        column_strings = column_strings[:-2] + ')'
        value_strings = value_strings[:-2] + ')'

        with self._connection.cursor() as cursor:
            query = 'INSERT INTO ' + '`' + self._data[
                'table_name'] + '` ' + column_strings + ' VALUES ' + value_strings + ';'
            try:
                cursor.execute(query)
                if cursor.rowcount > 0:
                    if self._auto_commit:
                        self.commit()
            except pymysql.Error as error:
                self._logger.error(
                    'Failed to insert record(s) into the table {}'.format(
                        error))
                raise Exception
Ejemplo n.º 10
0
    def find_by_template(self,
                         template,
                         field_list=None,
                         limit=None,
                         offset=None,
                         order_by=None):
        '''

        :param template: A dictionary of the form { 'field1' : value1, 'field2': value2, ...}
        :param field_list: A list of request fields of the form, ['fielda', 'fieldb', ...]
        :param limit: Do not worry about this for now.
        :param offset: Do not worry about this for now.
        :param order_by: Do not worry about this for now.
        :return: A list containing dictionaries. A dictionary is in the list representing each record
            that matches the template. The dictionary only contains the requested fields.
        '''
        template_string = ''
        if not Helper.is_empty(template):
            if not Helper.is_template_valid(template, self.get_columns()):
                self._logger.error(
                    'Some columns in the specified template don\'t match table columns'
                )
                raise Exception
            template_string = 'WHERE ' + self._compose_template_string(
                template)

        if Helper.is_empty(field_list):
            field_list_string = '*'
        else:
            field_list_string = self._compose_field_list_string(field_list)

        query = 'SELECT ' + field_list_string + ' FROM ' + '`' + self._data[
            'table_name'] + '`' + template_string + ';'
        with self._connection.cursor() as cursor:
            try:
                cursor.execute(query)
                result = cursor.fetchall()

                if len(result) == 0:
                    return []
                return result
            except pymysql.Error as error:
                self._logger.error(
                    'Failed to find record(s) in the table {}'.format(error))
                raise Exception
Ejemplo n.º 11
0
    def delete_by_key(self, key_fields):
        '''

        Deletes the record that matches the key.

        :param template: A template.
        :return: A count of the rows deleted.
        '''
        if Helper.is_empty(self._data['key_columns']):
            self._logger.error('Table has no primary keys')
            raise Exception

        if not Helper.are_key_fields_valid(key_fields,
                                           self._data['key_columns']):
            self._logger.error('Key fields are not valid')
            raise Exception

        template = Helper.convert_key_fields_to_template(
            key_fields, self._data['key_columns'])
        return self.delete_by_template(template)
Ejemplo n.º 12
0
    def update_by_template(self, template, new_values):
        '''

        :param template: Template for rows to match.
        :param new_values: New values to set for matching fields.
        :return: Number of rows updated.
        '''
        if Helper.is_empty(new_values):
            return 0

        template_string = ''
        if not Helper.is_empty(template):
            if not Helper.is_template_valid(template, self.get_columns()):
                self._logger.error(
                    'Some columns in the specified template don\'t match table columns'
                )
                raise Exception
            template_string = 'WHERE ' + self._compose_template_string(
                template)

        update_string = ''
        for key in new_values:
            update_string = update_string + '`' + key + '`=\'' + new_values[
                key] + '\', '

        update_string = update_string[:-2]
        query = 'UPDATE ' + '`' + self._data[
            'table_name'] + '` SET ' + update_string + template_string + ';'
        with self._connection.cursor() as cursor:
            try:
                cursor.execute(query)
                if cursor.rowcount > 0:
                    if self._auto_commit:
                        self.commit()
                return cursor.rowcount
            except pymysql.Error as error:
                self._logger.error(
                    'Failed to update record(s) in the table {}'.format(error))
                raise Exception
Ejemplo n.º 13
0
    def update_by_key(self, key_fields, new_values):
        '''

        :param key_fields: List of value for the key fields.
        :param new_values: A dict of field:value to set for updated row.
        :return: Number of rows updated.
        '''
        if Helper.is_empty(self._data['key_columns']):
            self._logger.error('Table has no primary keys')
            raise Exception

        if not Helper.are_key_fields_valid(key_fields,
                                           self._data['key_columns']):
            self._logger.error('Key fields are not valid')
            raise Exception

        if Helper.is_empty(new_values):
            return 0

        template = Helper.convert_key_fields_to_template(
            key_fields, self._data['key_columns'])
        return self.update_by_template(template, new_values)
Ejemplo n.º 14
0
    def insert(self, new_record):
        '''

        :param new_record: A dictionary representing a row to add to the set of records.
        :return: None
        '''
        if not Helper.is_new_record_valid(new_record, self.get_columns()):
            self._logger.error('new_record must contains all columns')
            raise Exception

        if self._violate_primary_key_constraint(new_record):
            self._logger.error('Violates primary key constraint')
            raise Exception

        self._add_row(new_record)
Ejemplo n.º 15
0
    def update_by_template(self, template, new_values, limit=None):
        '''

        :param template: Template for rows to match.
        :param new_values: New values to set for matching fields.
        :return: Number of rows updated.
        '''
        if Helper.is_empty(new_values):
            return 0

        if not Helper.is_empty(template):
            if not Helper.is_template_valid(template, self.get_columns()):
                self._logger.error(
                    'Some columns in the specified template don\'t match table columns'
                )
                raise Exception

        # Extract key_fields from template if any
        changed_keys = Helper.extract_key_columns_and_values_from_template(
            new_values, self._data['key_columns'])

        rows = self.get_rows()
        r_indexes = []
        for i in range(0, len(rows)):
            if limit is not None:
                if len(r_indexes) == limit:
                    break

            if Helper.matches_template(rows[i], template):
                # Apply changed_keys and check if modification would result in duplicate primary key
                if not Helper.is_empty(changed_keys):
                    # Very important to make copy of rows[i] so that it will not be altered
                    new_keys_template = Helper.change_keys(
                        copy.copy(rows[i]), changed_keys)

                    if self._violate_primary_key_constraint(new_keys_template):
                        self._logger.error('Violates primary key constraint')
                        raise Exception

                r_indexes.append(i)

        if len(r_indexes) == 0:
            return 0
        elif len(r_indexes) == 1:
            self._modify_row(r_indexes[0], new_values)
            return 1
        else:
            self._modify_rows(r_indexes, new_values)
            return len(r_indexes)
Ejemplo n.º 16
0
    def find_by_template(self,
                         template,
                         field_list=None,
                         limit=None,
                         offset=None,
                         order_by=None):
        '''

        :param template: A dictionary of the form { 'field1' : value1, 'field2': value2, ...}
        :param field_list: A list of request fields of the form, ['fielda', 'fieldb', ...]
        :param limit: Do not worry about this for now.
        :param offset: Do not worry about this for now.
        :param order_by: Do not worry about this for now.
        :return: A list containing dictionaries. A dictionary is in the list representing each record
            that matches the template. The dictionary only contains the requested fields.
        '''
        if not Helper.is_empty(template):
            if not Helper.is_template_valid(template, self.get_columns()):
                self._logger.error(
                    'Some columns in the specified template don\'t match table columns'
                )
                raise Exception

        if not Helper.is_empty(field_list):
            if not Helper.is_column_list_valid(field_list, self.get_columns()):
                self._logger.error(
                    'Some columns in the specified field_list don\'t match table columns'
                )
                raise Exception

        matching_rows = []
        for row in self.get_rows():
            if limit is not None:
                if len(matching_rows) == limit:
                    break

            if Helper.matches_template(row, template):
                matching_rows.append(
                    Helper.extract_needed_fields(field_list, row))

        return matching_rows
Ejemplo n.º 17
0
 def __init__(self, userConfig=None, launchCommand=None):
     """
         Class constructor
         @param userConfig {Dict|None} - [optional]:None
         @param launchCommand {String|None} - [optional]:None
     """
     super().__init__()
     self._defaultConfig = Helper.parseJson("src/default_config.json")
     if not self._defaultConfig:
         debug.log().critical("Default config doesn't exist")
         Helper.systemExit()
     # Config in current folder, which program was run.
     self._currentConfig = Helper.parseJson(
         os.path.join(os.getcwd(), "current.json"))
     self.config = Helper.MergeDict(self._defaultConfig,
                                    self._currentConfig)
     if userConfig:
         self._userConfig = Helper.parseJson(userConfig)
         self.config = Helper.MergeDict(self.config, self._userConfig)
     if launchCommand:
         self.config.launchCommand = launchCommand
     # Keys, which must be in each pattern
     self.validKeys = ["code", "regex", "name"]
     self.checkValidConfigKey()
Ejemplo n.º 18
0
 def writeReport(self):
     Helper.toJsonFile(self.file, self.obj, 'w')
Ejemplo n.º 19
0
def test_find_by_template(appearances_csv):
    # Function fails if there is incorrect columns in template or field_list
    with pytest.raises(Exception):
        assert appearances_csv.find_by_template({'wrong': '150'}, ['playerID'])
    with pytest.raises(Exception):
        assert appearances_csv.find_by_template({'playerID': 'aardsda01'},
                                                ['wrong'])

        # Example when some rows match the template
        label = [{
            'playerID': 'millake01',
            'teamID': 'BOS',
            'yearID': '2004'
        }, {
            'playerID': 'staubru01',
            'teamID': 'HOU',
            'yearID': '1963'
        }, {
            'playerID': 'wongko01',
            'teamID': 'SLN',
            'yearID': '2015'
        }]
        assert Helper.compare_two_list_of_dicts(
            appearances_csv.find_by_template(
                {
                    'G_all': '150',
                    'GS': '140',
                    'G_ph': '7'
                }, ['playerID', 'teamID', 'yearID']), label)

        # Example when some rows matches the template but no field_list is provided
        # All the fields are returned in this case, this mimics the database behavior of SELECT *
        label = [{
            'yearID': '2004',
            'teamID': 'BOS',
            'lgID': 'AL',
            'playerID': 'millake01',
            'G_all': '150',
            'GS': '140',
            'G_batting': '150',
            'G_defense': '137',
            'G_p': '0',
            'G_c': '0',
            'G_1b': '69',
            'G_2b': '0',
            'G_3b': '0',
            'G_ss': '0',
            'G_lf': '20',
            'G_cf': '0',
            'G_rf': '55',
            'G_of': '74',
            'G_dh': '8',
            'G_ph': '7',
            'G_pr': '0'
        }, {
            'yearID': '1963',
            'teamID': 'HOU',
            'lgID': 'NL',
            'playerID': 'staubru01',
            'G_all': '150',
            'GS': '140',
            'G_batting': '150',
            'G_defense': '144',
            'G_p': '0',
            'G_c': '0',
            'G_1b': '109',
            'G_2b': '0',
            'G_3b': '0',
            'G_ss': '0',
            'G_lf': '0',
            'G_cf': '0',
            'G_rf': '49',
            'G_of': '49',
            'G_dh': '0',
            'G_ph': '7',
            'G_pr': '1'
        }, {
            'yearID': '2015',
            'teamID': 'SLN',
            'lgID': 'NL',
            'playerID': 'wongko01',
            'G_all': '150',
            'GS': '140',
            'G_batting': '150',
            'G_defense': '147',
            'G_p': '0',
            'G_c': '0',
            'G_1b': '0',
            'G_2b': '147',
            'G_3b': '0',
            'G_ss': '0',
            'G_lf': '0',
            'G_cf': '0',
            'G_rf': '0',
            'G_of': '0',
            'G_dh': '0',
            'G_ph': '7',
            'G_pr': '0'
        }]
        assert Helper.compare_two_list_of_dicts(
            appearances_csv.find_by_template({
                'G_all': '150',
                'GS': '140',
                'G_ph': '7'
            }), label)

    # No rows match the template, returning an empty list
    assert appearances_csv.find_by_template({
        'G_all': '123',
        'GS': '250'
    }, ['playerID', 'yearID']) == []
Ejemplo n.º 20
0
    def collect(self, dir, conf):
        Helper.collect(self, dir, conf)

        try:
            self.total_authors = int(
                getpipeoutput(['git log', 'git shortlog -s', 'wc -l']))
        except:
            self.total_authors = 0
        # self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))

        self.activity_by_hour_of_day = {}  # hour -> commits
        self.activity_by_day_of_week = {}  # day -> commits
        self.activity_by_month_of_year = {}  # month [1-12] -> commits
        self.activity_by_hour_of_week = {}  # weekday -> hour -> commits
        self.activity_by_hour_of_day_busiest = 0
        self.activity_by_hour_of_week_busiest = 0
        self.activity_by_year_week = {}  # yy_wNN -> commits
        self.activity_by_year_week_peak = 0
        self.activity_by_every_day = {}  #

        self.authors = {
        }  # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}

        # domains
        self.domains = {}  # domain -> commits

        # author of the month
        self.author_of_month = {}  # month -> author -> commits
        self.author_of_year = {}  # year -> author -> commits
        self.author_of_day = {}  # year -> author -> commits
        self.commits_by_month = {}  # month -> commits
        self.commits_by_year = {}  # year -> commits
        self.commits_by_day = {}  # year -> commits
        self.first_commit_stamp = 0
        self.last_commit_stamp = 0
        self.last_active_day = None
        self.active_days = set()

        # lines
        self.total_lines = 0
        self.total_lines_added = 0
        self.total_lines_removed = 0

        # timezone
        self.commits_by_timezone = {}  # timezone -> commits

        # tags
        self.tags = {}
        lines = getpipeoutput(['git show-ref --tags']).split('\n')
        for line in lines:
            if len(line) == 0:
                continue
            (hash, tag) = line.split(' ')

            tag = tag.replace('refs/tags/', '')
            output = getpipeoutput(
                ['git log "%s" --pretty=format:"%%at %%an" -n 1' % hash])

            if len(output) > 0:
                parts = output.split(' ')
                stamp = 0
                try:
                    stamp = int(parts[0])
                except ValueError:
                    stamp = 0
                self.tags[tag] = {
                    'stamp':
                    stamp,
                    'hash':
                    hash,
                    'date':
                    datetime.datetime.fromtimestamp(stamp).strftime(
                        '%Y-%m-%d'),
                    'commits':
                    0,
                    'authors': {}
                }

        # collect info on tags, starting from latest
        tags_sorted_by_date_desc = list(
            map(
                lambda el: el[1],
                reversed(
                    sorted(
                        map(lambda el: (el[1]['date'], el[0]),
                            self.tags.items())))))
        prev = None
        for tag in reversed(tags_sorted_by_date_desc):
            cmd = 'git shortlog -s "%s"' % tag
            if prev != None:
                cmd += ' "^%s"' % prev
            output = getpipeoutput([cmd])
            if len(output) == 0:
                continue
            prev = tag
            for line in output.split('\n'):
                parts = re.split('\s+', line, 2)
                commits = int(parts[1])
                author = parts[2]
                self.tags[tag]['commits'] += commits
                self.tags[tag]['authors'][author] = commits

        # Collect revision statistics
        # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
        lines = getpipeoutput([
            'git rev-list --pretty=format:"%at %ai %an <%aE>" HEAD',
            'grep -v ^commit'
        ]).split('\n')
        for line in lines:
            parts = line.split(' ', 4)
            author = ''
            try:
                stamp = int(parts[0])
            except ValueError:
                stamp = 0
            timezone = parts[3]
            author, mail = parts[4].split('<', 1)
            author = author.rstrip()
            mail = mail.rstrip('>')
            domain = '?'
            if mail.find('@') != -1:
                domain = mail.rsplit('@', 1)[1]
            date = datetime.datetime.fromtimestamp(float(stamp))

            # First and last commit stamp
            if self.last_commit_stamp == 0:
                self.last_commit_stamp = stamp
            self.first_commit_stamp = stamp

            # activity
            # hour
            hour = date.hour
            self.activity_by_hour_of_day[
                hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
            # most active hour?
            if self.activity_by_hour_of_day[
                    hour] > self.activity_by_hour_of_day_busiest:
                self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[
                    hour]

            # day of week
            day = date.weekday()
            self.activity_by_day_of_week[
                day] = self.activity_by_day_of_week.get(day, 0) + 1

            # domain stats
            if domain not in self.domains:
                self.domains[domain] = {}
            # commits
            self.domains[domain]['commits'] = self.domains[domain].get(
                'commits', 0) + 1

            # hour of week
            if day not in self.activity_by_hour_of_week:
                self.activity_by_hour_of_week[day] = {}
            self.activity_by_hour_of_week[day][
                hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
            # most active hour?
            if self.activity_by_hour_of_week[day][
                    hour] > self.activity_by_hour_of_week_busiest:
                self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[
                    day][hour]

            # month of year
            month = date.month
            self.activity_by_month_of_year[
                month] = self.activity_by_month_of_year.get(month, 0) + 1

            # yearly/weekly activity
            yyw = date.strftime('%Y-%W')
            self.activity_by_year_week[yyw] = self.activity_by_year_week.get(
                yyw, 0) + 1
            if self.activity_by_year_week_peak < self.activity_by_year_week[
                    yyw]:
                self.activity_by_year_week_peak = self.activity_by_year_week[
                    yyw]

            # author stats
            if author not in self.authors:
                self.authors[author] = {}
            # commits
            if 'last_commit_stamp' not in self.authors[author]:
                self.authors[author]['last_commit_stamp'] = stamp
            self.authors[author]['first_commit_stamp'] = stamp
            self.authors[author]['commits'] = self.authors[author].get(
                'commits', 0) + 1

            # author of the month/year
            yymm = date.strftime('%Y-%m')
            if yymm in self.author_of_month:
                self.author_of_month[yymm][
                    author] = self.author_of_month[yymm].get(author, 0) + 1
            else:
                self.author_of_month[yymm] = {}
                self.author_of_month[yymm][author] = 1
            self.commits_by_month[yymm] = self.commits_by_month.get(yymm,
                                                                    0) + 1

            yy = date.year
            if yy in self.author_of_year:
                self.author_of_year[yy][author] = self.author_of_year[yy].get(
                    author, 0) + 1
            else:
                self.author_of_year[yy] = {}
                self.author_of_year[yy][author] = 1
            self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1

            # authors: active days
            yymmdd = date.strftime('%Y-%m-%d')
            if 'last_active_day' not in self.authors[author]:
                self.authors[author]['last_active_day'] = yymmdd
                self.authors[author]['active_days'] = 1
            elif yymmdd != self.authors[author]['last_active_day']:
                self.authors[author]['last_active_day'] = yymmdd
                self.authors[author]['active_days'] += 1

            # project: active days
            if yymmdd != self.last_active_day:
                self.last_active_day = yymmdd
                self.active_days.add(yymmdd)

            # timezone
            self.commits_by_timezone[timezone] = self.commits_by_timezone.get(
                timezone, 0) + 1

        # TODO Optimize this, it's the worst bottleneck
        # outputs "<stamp> <files>" for each revision
        self.files_by_stamp = {}  # stamp -> files
        revlines = getpipeoutput(
            ['git rev-list --pretty=format:"%at %T" HEAD',
             'grep -v ^commit']).strip().split('\n')
        lines = []
        for revline in revlines:
            time, rev = revline.split(' ')
            linecount = self.getFilesInCommit(rev)
            lines.append('%d %d' % (int(time), linecount))

        self.total_commits = len(lines)
        for line in lines:
            parts = line.split(' ')
            if len(parts) != 2:
                continue
            (stamp, files) = parts[0:2]
            try:
                self.files_by_stamp[int(stamp)] = int(files)
            except ValueError:
                Log.warning('Warning: failed to parse line "%s"' % line)

        # extensions
        self.extensions = {}  # extension -> files, lines
        lines = getpipeoutput(['git ls-tree -r -z HEAD']).split('\000')
        self.total_files = len(lines)
        for line in lines:
            if len(line) == 0:
                continue
            parts = re.split('\s+', line, 4)
            sha1 = parts[2]
            filename = parts[3]

            if filename.find('.') == -1 or filename.rfind('.') == 0:
                ext = ''
            else:
                ext = filename[(filename.rfind('.') + 1):]
            if len(ext) > conf['max_ext_length']:
                ext = ''

            if ext not in self.extensions:
                self.extensions[ext] = {'files': 0, 'lines': 0}

            self.extensions[ext]['files'] += 1
            try:
                self.extensions[ext]['lines'] += self.getLinesInBlob(sha1)
            except:
                Log.warning('Warning: Could not count lines for file "%s"' %
                            line)

        # line statistics
        # outputs:
        #  N files changed, N insertions (+), N deletions(-)
        # <stamp> <author>
        self.changes_by_date = {}  # stamp -> { files, ins, del }

        # self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
        lines = self.getLinesByTime()
        files = 0
        inserted = 0
        deleted = 0
        total_lines = 0
        author = None
        for line in lines:
            if len(line) == 0:
                continue

            # <stamp> <author>
            if line.find('files changed,') == -1:
                pos = line.find(' ')
                if pos != -1:
                    try:
                        (stamp, author) = (int(line[:pos]), line[pos + 1:])
                        self.changes_by_date[stamp] = {
                            'files': files,
                            'ins': inserted,
                            'del': deleted,
                            'lines': total_lines
                        }
                        if author not in self.authors:
                            self.authors[author] = {
                                'lines_added': 0,
                                'lines_removed': 0
                            }
                        self.authors[author][
                            'lines_added'] = self.authors[author].get(
                                'lines_added', 0) + inserted
                        self.authors[author][
                            'lines_removed'] = self.authors[author].get(
                                'lines_removed', 0) + deleted
                    except ValueError:
                        Log.warning('Warning: unexpected line "%s"' % line)
                else:
                    Log.warning('Warning: unexpected line "%s"' % line)
            else:
                numbers = re.findall('\d+', line)
                if len(numbers) == 3:
                    (files, inserted, deleted) = map(lambda el: int(el),
                                                     numbers)
                    total_lines += inserted
                    total_lines -= deleted
                    self.total_lines_added += inserted
                    self.total_lines_removed += deleted
                else:
                    Log.warning('Warning: failed to handle line "%s"' % line)
                    (files, inserted, deleted) = (0, 0, 0)
        self.total_lines = total_lines
        # 统计每个人每天的任务量
        # 初始化author_of_day数组
        for name in self.authors.keys():
            self.author_of_day[name] = {}
            self.author_of_day[name]['lines_added'] = 0
            self.author_of_day[name]['lines_removed'] = 0
            self.author_of_day[name]['commit'] = 0
        today = datetime.date.today()
        yesterday = today - datetime.timedelta(days=1)
        lines = self.getLinesByTime(yesterday.strftime('%Y-%m-%d'),
                                    today.strftime('%Y-%m-%d'))
        for line in lines:
            if len(line) == 0:
                continue
            # <stamp> <author>
            if line.find('files changed,') == -1:
                pos = line.find(' ')
                if pos != -1:
                    try:
                        (stamp, author) = (int(line[:pos]), line[pos + 1:])
                        if author in self.authors:
                            self.author_of_day[author][
                                'commit'] = self.author_of_day[author].get(
                                    'commit', 0) + 1
                            self.author_of_day[author][
                                'lines_added'] = self.author_of_day[
                                    author].get('lines_added', 0) + inserted
                            self.author_of_day[author][
                                'lines_removed'] = self.author_of_day[
                                    author].get('lines_removed', 0) + deleted
                    except ValueError:
                        Log.warning('Warning: unexpected line "%s"' % line)
                else:
                    Log.warning('Warning: unexpected line "%s"' % line)
        self.total_lines = total_lines
Ejemplo n.º 21
0
"""
Created on Tue Jul 16 15:11:30 2019

@author: d2gu53
"""

#### Neural Net in Python

import numpy as np
from sklearn import datasets
from sklearn import linear_model
import sklearn
from matplotlib import pyplot as plt
from src.Helper import Helper

helper = Helper()

np.random.seed(0)
X, y = datasets.make_moons(1000, noise=0.3)

clf = sklearn.linear_model.LogisticRegression()
clf.fit(X, y)  ### überschreibt das clf Objekt

plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
X = helper.plot_decision_boundary(lambda x: clf.predict(x), X)
plt.show()

### implement back propagation
num_examples = len(X)  # size training set
nn_input_dim = 2  # input dimensionality
nn_output_dim = 2  # output dimensionality