def reset(self, data_folder, file_name, suffix=_default_suffix, variables=None):
        """Reset the aggregator class and clear design.

        Parameters
        ----------
        data_folder : str
            folder which contains of data of the subjects
        file_name : str
            name of the files. All files that start with this name
            will be considered for the analysis (cf. aggregator.data_files)
        suffix : str, optional
            if specified only files that end with this particular suffix
            will be considered (default=.xpd)
        variables : array of str, optional
            array of variable names, process only the specified variables


        """

        self._data_folder = data_folder
        self._file_name = file_name
        self._data_files = []
        self._variables = []
        self._dv = []
        self._dv_txt = []
        self._iv = []
        self._iv_txt = []
        self._exclusions = []
        self._exclusions_txt = []
        self._computes = []
        self._computes_txt = []
        self._recode_txt = []
        self._recode = []
        self._subject_variables = []
        self._last_data = []
        self._added_data = []
        self._added_variables = []
        self._suffix = suffix

        for flname in _os.listdir(_os.path.dirname(self._data_folder + "/")):
            if flname.endswith(self._suffix) and flname.startswith(self._file_name):
                _data, vnames, _subject_info, _comments = read_datafile(
                    self._data_folder + "/" + flname, read_variables=variables
                )

                if len(self._variables) < 1:
                    self._variables = vnames
                else:
                    if vnames != self._variables:
                        message = u"Different variables in ".format(flname)
                        message = message + u"\n{0}".format(vnames)
                        message = message + u"\ninstead of\n{0}".format(self._variables)
                        raise RuntimeError(_unicode2str(message))
                self._data_files.append(flname)

        if len(self._data_files) < 1:
            raise Exception("No data files found in {0}".format(_unicode2str(self._data_folder)))

        print "found {0} subject_data sets".format(len(self._data_files))
        print "found {0} variables: {1}".format(len(self._variables), [_unicode2str(x) for x in self._variables])
 def _get_variable_id(self, variables, throw_exception=False):
     for cnt, v in enumerate(self.variables):
         if variables == v:
             return cnt
     if throw_exception:
         raise RuntimeError("Unknown variable name '{0}'".format(_unicode2str(variables)))
     return None
Example #3
0
    def print_n_trials(self, variables):
        """Print the number of trials in the combinations of the independent
        variables.

        Notes
        -----
        The functions is for instance useful to quickly check the experimental
        design.

        Parameters
        ----------
        variables : str or list
            A string or a list of strings that represent the names of one or
            more data variables (aggregator.variables)

        """

        old_iv = self._iv
        old_dv = self._dv
        self.set_dependent_variables("n_trials")
        self.set_independent_variables(variables)
        result, varnames = self.aggregate()
        for row in result:
            print "Subject {0}".format(row[0])
            for cnt, var in enumerate(varnames):
                if cnt > 0:
                    if isinstance(row[cnt], unicode):
                        _row_data = _unicode2str(row[cnt])
                    else:
                        _row_data = row[cnt]
                    print "\t{0}:\t{1}".format(var[4:], _row_data)
        print "\n"
        self._dv = old_dv
        self._iv = old_iv
    def print_n_trials(self, variables):
        """Print the number of trials in the combinations of the independent
        variables.

        Notes
        -----
        The functions is for instance useful to quickly check the experimental
        design.

        Parameters
        ----------
        variables : str or list
            A string or a list of strings that represent the names of one or
            more data variables (aggregator.variables)

        """

        old_iv = self._iv
        old_dv = self._dv
        self.set_dependent_variables("n_trials")
        self.set_independent_variables(variables)
        result, varnames = self.aggregate()
        for row in result:
            print "Subject {0}".format(row[0])
            for cnt, var in enumerate(varnames):
                if cnt > 0:
                    if isinstance(row[cnt], unicode):
                        _row_data = _unicode2str(row[cnt])
                    else:
                        _row_data = row[cnt]
                    print "\t{0}:\t{1}".format(var[4:], _row_data)
        print "\n"
        self._dv = old_dv
        self._iv = old_iv
    def _add_exclusion(self, relation_syntax):
        """Add an exclusion."""

        relation = self._parse_syntax(relation_syntax, throw_exception=True)
        if relation[1] in self._relations:
            self._exclusions.append(relation)
        else:
            raise RuntimeError("Incorrect exclusion syntax: '{0}'".format(_unicode2str(relation_syntax)))
Example #6
0
 def _add_dependent_variable(self, variable):
     if variable == "n_trials":
         self._dv.append([variable, 0])
     else:
         tmp = variable.replace(")", "").split("(")
         dv_fnc = tmp[0].strip()
         try:
             dv_txt = tmp[1].strip()
         except:
             raise RuntimeError("Incorrect syntax for DV: '{0}'".format(
                 _unicode2str(variable)))
         var_id = self._get_variable_id(dv_txt, True)
         if dv_fnc in self._dv_functions:
             self._dv.append([dv_fnc, var_id])
         else:
             raise RuntimeError("Unknown function for dependent variable:" +
                                " '{0}'".format(_unicode2str(dv_fnc)))
Example #7
0
 def _get_variable_id(self, variables, throw_exception=False):
     for cnt, v in enumerate(self.variables):
         if variables == v:
             return cnt
     if (throw_exception):
         raise RuntimeError("Unknown variable name '{0}'".format(
             _unicode2str(variables)))
     return None
Example #8
0
    def _add_exclusion(self, relation_syntax):
        """Add an exclusion."""

        relation = self._parse_syntax(relation_syntax, throw_exception=True)
        if relation[1] in self._relations:
            self._exclusions.append(relation)
        else:
            raise RuntimeError("Incorrect exclusion syntax: '{0}'".format(
                _unicode2str(relation_syntax)))
 def _add_dependent_variable(self, variable):
     if variable == "n_trials":
         self._dv.append([variable, 0])
     else:
         tmp = variable.replace(")", "").split("(")
         dv_fnc = tmp[0].strip()
         try:
             dv_txt = tmp[1].strip()
         except:
             raise RuntimeError(
                 "Incorrect syntax for DV: '{0}'".format(
                     _unicode2str(variable)))
         var_id = self._get_variable_id(dv_txt, True)
         if dv_fnc in self._dv_functions:
             self._dv.append([dv_fnc, var_id])
         else:
             raise RuntimeError("Unknown function for dependent variable:" +
                                " '{0}'".format(_unicode2str(dv_fnc)))
Example #10
0
def write_csv_file(filename, data, varnames=None, delimiter=','):
    """Write 2D data array to csv file.

    Parameters
    ----------
    filename : str
        name (fullpath) of the data file
    data : list of list
        2D array with data (list of list)
    varnames : list of str, optional
        array of strings representing variable names
    delimiter : str, optional
        delimiter character (default=",")

    """

    _sys.stdout.write("write file: {0}".format(filename))
    try:
        _locale_enc = _locale.getdefaultlocale()[1]
    except:
        _locale_enc = "UTF-8"
    with open(filename, 'w') as f:
        header = "# -*- coding: {0} -*-\n".format(
            _locale_enc)
        f.write(header)
        if varnames is not None:
            for c, v in enumerate(varnames):
                if c > 0:
                    f.write(delimiter)
                f.write(_unicode2str(v))
            f.write("\n")
        cnt = 0
        for row in data:
            for c, v in enumerate(row):
                if c > 0:
                    f.write(delimiter)
                if isinstance(v, unicode):
                    _unicode2str(v)
                f.write(v)
                cnt += 1
            f.write("\n")

    print " ({0} cells in {1} rows)".format(cnt, len(data))
Example #11
0
def write_csv_file(filename, data, varnames=None, delimiter=','):
    """Write 2D data array to csv file.

    Parameters
    ----------
    filename : str
        name (fullpath) of the data file
    data : list of list
        2D array with data (list of list)
    varnames : list of str, optional
        array of strings representing variable names
    delimiter : str, optional
        delimiter character (default=",")

    """

    _sys.stdout.write("write file: {0}".format(filename))
    try:
        _locale_enc = _locale.getdefaultlocale()[1]
    except:
        _locale_enc = "UTF-8"
    with open(filename, 'w') as f:
        header = "# -*- coding: {0} -*-\n".format(_locale_enc)
        f.write(header)
        if varnames is not None:
            for c, v in enumerate(varnames):
                if c > 0:
                    f.write(delimiter)
                f.write(_unicode2str(v))
            f.write("\n")
        cnt = 0
        for row in data:
            for c, v in enumerate(row):
                if c > 0:
                    f.write(delimiter)
                if isinstance(v, unicode):
                    _unicode2str(v)
                f.write(v)
                cnt += 1
            f.write("\n")

    print " ({0} cells in {1} rows)".format(cnt, len(data))
    def _add_compute_variable(self, compute_syntax):
        """Add a new variable to be computed."""

        tmp = compute_syntax.replace("==", "@@")  # avoid confusion = & ==
        tmp = tmp.replace("!=", "##")  # avoid confusion = & ==
        tmp = tmp.split("=")
        variable_name = tmp[0].strip()
        try:
            syntax = tmp[1].strip()
            syntax = syntax.replace("@@", "==")
            syntax = syntax.replace("##", "==")
        except:
            raise RuntimeError("Incorrect compute syntax: '{0}'".format(_unicode2str(compute_syntax)))

        variable_def = self._parse_syntax(syntax, throw_exception=True)
        if variable_def is None:
            variable_def = self._parse_operation(syntax, throw_exception=True)
        if self._get_variable_id(variable_name) is not None:
            raise RuntimeError("Variable already defined '{0}'".format(_unicode2str(variable_name)))
        else:
            self._variables.append(variable_name)
            self._computes.append([variable_name, variable_def])
Example #13
0
    def _add_compute_variable(self, compute_syntax):
        """Add a new variable to be computed."""

        tmp = compute_syntax.replace("==", "@@")  # avoid confusion = & ==
        tmp = tmp.replace("!=", "##")  # avoid confusion = & ==
        tmp = tmp.split("=")
        variable_name = tmp[0].strip()
        try:
            syntax = tmp[1].strip()
            syntax = syntax.replace("@@", "==")
            syntax = syntax.replace("##", "==")
        except:
            raise RuntimeError("Incorrect compute syntax: '{0}'".format(
                _unicode2str(compute_syntax)))

        variable_def = self._parse_syntax(syntax, throw_exception=True)
        if variable_def is None:
            variable_def = self._parse_operation(syntax, throw_exception=True)
        if self._get_variable_id(variable_name) is not None:
            raise RuntimeError("Variable already defined '{0}'".format(
                _unicode2str(variable_name)))
        else:
            self._variables.append(variable_name)
            self._computes.append([variable_name, variable_def])
    def _add_variable_recoding(self, recode_syntax):
        """Add a new variable recoding rule."""

        error = False
        tmp = recode_syntax.split(":")
        if len(tmp) == 2:
            var_id = self._get_variable_id(tmp[0].strip(), True)
            excl_array = []
            for rule in tmp[1].split(","):
                rule = rule.split("=")
                if len(rule) == 2:
                    excl_array.append([rule[0].strip(), rule[1].strip()])
                else:
                    error = True
        else:
            error = True

        if error:
            raise RuntimeError("Incorrect recoding syntax: '{0}'".format(_unicode2str(recode_syntax)))
        else:
            self._recode.append([var_id, excl_array])
Example #15
0
    def _add_variable_recoding(self, recode_syntax):
        """Add a new variable recoding rule."""

        error = False
        tmp = recode_syntax.split(":")
        if len(tmp) == 2:
            var_id = self._get_variable_id(tmp[0].strip(), True)
            excl_array = []
            for rule in tmp[1].split(","):
                rule = rule.split("=")
                if len(rule) == 2:
                    excl_array.append([rule[0].strip(), rule[1].strip()])
                else:
                    error = True
        else:
            error = True

        if error:
            raise RuntimeError("Incorrect recoding syntax: '{0}'".format(
                _unicode2str(recode_syntax)))
        else:
            self._recode.append([var_id, excl_array])
    def _parse_syntax(self, syntax, throw_exception):
        """Preprocess relation and operation syntax.

        Returns relation array.

        """

        rels_ops = _copy(self._relations)
        rels_ops.extend(self._operations)
        found = None
        for ro in rels_ops:
            if syntax.find(ro) > 0:
                found = ro
                break
        if found is None:
            if throw_exception:
                raise RuntimeError("Incorrect syntax: '{0}'".format(_unicode2str(syntax)))
            else:
                return None
        else:
            syntax = syntax.split(found)
            var_id = self._get_variable_id(syntax[0].strip(), True)
            return [var_id, found, syntax[1].strip()]
Example #17
0
    def _parse_syntax(self, syntax, throw_exception):
        """Preprocess relation and operation syntax.

        Returns relation array.

        """

        rels_ops = _copy(self._relations)
        rels_ops.extend(self._operations)
        found = None
        for ro in rels_ops:
            if syntax.find(ro) > 0:
                found = ro
                break
        if found is None:
            if throw_exception:
                raise RuntimeError("Incorrect syntax: '{0}'".format(
                    _unicode2str(syntax)))
            else:
                return None
        else:
            syntax = syntax.split(found)
            var_id = self._get_variable_id(syntax[0].strip(), True)
            return [var_id, found, syntax[1].strip()]
Example #18
0
    def get_data(self, filename, recode_variables=True,
                 compute_new_variables=True, exclude_trials=True):
        """Read data from from a single Expyriment data file.

        Notes
        -----
        The function can be only applied on data of aggregator.data_files,
        that is, on the files in the defined data folder that start with
        the experiment name. According to the defined design, the result
        contains recoded data together with the new computed variables, and the
        subject variables from the headers of the Expyriment data files.

        Parameters
        ----------
        filename : str
            name of the Expyriment data file
        recode_variables : bool, optional
            set to False if defined variable recodings should not be applied
            (default=True)
        compute_new_variables : bool, optional
            set to False if new defined variables should not be computed
            (default=True)
        exclude_trials : bool, optional
            set to False if exclusion rules should not be applied
            (default=True)

        Returns
        -------
        data : numpy.array
        var_names : list
            list of variable names
        info : str
            subject info
        comment : str
            comments in data

        """

        # check filename
        if filename not in self._data_files:
            raise RuntimeError("'{0}' is not in the data list\n".format(
                _unicode2str(filename)))

        data, _vnames, subject_info, comments = \
            read_datafile(self._data_folder + "/" + filename)
        print "   reading {0}".format(_unicode2str(filename))

        if recode_variables:
            for var_id, recoding in self._recode:
                for old, new in recoding:
                    for row in range(len(data)):
                        if data[row][var_id] == old:
                            data[row][var_id] = new

        data = _np.array(data, dtype='|U99')
        # compute new defined variables and append
        if compute_new_variables:
            for new_var_name, var_def in self._computes:
                if var_def[1] in self._relations:
                    # relations are true or false
                    col = _np.zeros([data.shape[0], 1], dtype=int)
                    idx = self._find_idx(data, var_def[0],
                                         var_def[1], var_def[2])
                    col[idx, 0] = 1
                else:
                    # operations
                    try:
                        a = _np.float64([data[:, var_def[0]]]).transpose()
                        second_var_id = self._get_variable_id(var_def[2],
                                                              False)
                        if second_var_id is not None:
                            b = _np.float64(
                                [data[:, second_var_id]]).transpose()
                        else:
                            b = _np.float64(var_def[2])
                    except:
                        msg = "Error while computing new variable {0}. " + \
                              "Non-number in variables of {1}"
                        msg.format(new_var_name, filename)
                        raise RuntimeError(msg)
                    if var_def[1] == "+":
                        col = a + b
                    elif var_def[1] == "-":
                        col = a - b
                    elif var_def[1] == "*":
                        col = a * b
                    elif var_def[1] == "/":
                        col = a / b
                    elif var_def[1] == "%":
                        col = a % b
                data = _np.concatenate((data, col), axis=1)

        # add subject information
        for sv in self.subject_variables:
            try:
                info = subject_info[sv]
            except:
                info = "nan"
            col = _np.array([[info for _x in range(data.shape[0])]])
            data = _np.c_[data, col.transpose()]

        # _add_exclusion trials
        if exclude_trials:
            for exl in self._exclusions:
                idx = self._find_idx(data, exl[0], exl[1], exl[2])
                if len(idx) > 0:
                    data = _np.delete(data, idx, axis=0)

        var = _copy(self._variables)
        var.extend(self._subject_variables)
        return [data, var, subject_info, comments]
Example #19
0
    def _find_idx(self, data, column_id, relation, value):
        """Find the indices of elements in a data column.

        Notes
        -----
        It compares of column elements with a value or the elements of a second
        column, if value is a name of variable.
        The method deals with numerical and string comparisons and throws an
        exception for invalid string comparisons.

        Parameters
        ----------
        data : numpy.array
            the data
        column_id : int
            id of column to compare
        relation : str
            relation as string.  possible relations:
            "==", "!=", ">", "<", ">=", "<=", "=>", "<="
        value : numeric or string
            value to find or a variable name

        """

        # is value a variable name
        second_var_id = self._get_variable_id(value, False)

        # _add_exclusion
        try:
            col = _np.float64(data[:, column_id])
        except:
            # handling strings
            col = data[:, column_id]
        try:
            if second_var_id is not None:
                val = _np.float64(data[:, second_var_id])
            else:
                val = _np.float64(value)
        except:
            # handling strings
            if second_var_id is not None:
                val = data[:, second_var_id]
            else:
                val = value

        if value.endswith("std") and (value.find("*") > 0):
            # remove relative depending std
            tmp = value.split("*")
            fac = float(tmp[0])

            mean_stds = self._dv_mean_std(data, column_id)
            idx = []
            if relation not in [">", "<", "=>", ">=", "=<", "<="]:
                raise RuntimeError("Incorrect syntax for " +
                                   "exception: '{0} {1}'".format(
                                       _unicode2str(relation),
                                       _unicode2str(value)))
            for cnt, row in enumerate(data):
                #find name of combination
                combi_str = self.variables[column_id]
                for iv in self._iv:
                    if isinstance(row[iv], unicode):
                        _row_data = _unicode2str(row[iv])
                    else:
                        _row_data = row[iv]
                    combi_str = combi_str + "_" + \
                        "{0}{1}".format(_unicode2str(self.variables[iv]),
                                        _row_data)
                deviation = float(row[column_id]) - mean_stds[combi_str][0]
                if (relation == ">" and
                    deviation > fac * mean_stds[combi_str][1]) or \
                   (relation == "=>" or relation == ">=" and
                    deviation >= fac * mean_stds[combi_str][1]) or \
                   (relation == "<" and
                    deviation < -fac * mean_stds[combi_str][1]) or \
                   (relation == "=<" or relation == "<=" and
                        deviation <= -fac * mean_stds[combi_str][1]):
                        idx.append(cnt)
            return idx
        else:
            if relation == "!=":
                comp = (col != val)
            elif relation == "==":
                comp = (col == val)
            elif relation == "<":
                comp = (col < val)
            elif relation == ">":
                comp = (col > val)
            elif relation == "=<" or relation == "<=":
                comp = (col <= val)
            elif relation == "=>" or relation == ">=":
                comp = (col >= val)
            else:
                comp = None  # should never occur
            if isinstance(comp, bool):
                raise RuntimeError(
                    "Incorrect syntax for " + "exception: '{0} {1}'".format(
                        _unicode2str(relation), _unicode2str(value)))
            return _np.flatnonzero(comp)
Example #20
0
    def _find_idx(self, data, column_id, relation, value):
        """Find the indices of elements in a data column.

        Notes
        -----
        It compares of column elements with a value or the elements of a second
        column, if value is a name of variable.
        The method deals with numerical and string comparisons and throws an
        exception for invalid string comparisons.

        Parameters
        ----------
        data : numpy.array
            the data
        column_id : int
            id of column to compare
        relation : str
            relation as string.  possible relations:
            "==", "!=", ">", "<", ">=", "<=", "=>", "<="
        value : numeric or string
            value to find or a variable name

        """

        # is value a variable name
        second_var_id = self._get_variable_id(value, False)

        # _add_exclusion
        try:
            col = _np.float64(data[:, column_id])
        except:
            # handling strings
            col = data[:, column_id]
        try:
            if second_var_id is not None:
                val = _np.float64(data[:, second_var_id])
            else:
                val = _np.float64(value)
        except:
            # handling strings
            if second_var_id is not None:
                val = data[:, second_var_id]
            else:
                val = value

        if value.endswith("std") and (value.find("*") > 0):
            # remove relative depending std
            tmp = value.split("*")
            fac = float(tmp[0])

            mean_stds = self._dv_mean_std(data, column_id)
            idx = []
            if relation not in [">", "<", "=>", ">=", "=<", "<="]:
                raise RuntimeError(
                    "Incorrect syntax for " + "exception: '{0} {1}'".format(
                        _unicode2str(relation), _unicode2str(value)))
            for cnt, row in enumerate(data):
                #find name of combination
                combi_str = self.variables[column_id]
                for iv in self._iv:
                    if isinstance(row[iv], unicode):
                        _row_data = _unicode2str(row[iv])
                    else:
                        _row_data = row[iv]
                    combi_str = combi_str + "_" + \
                        "{0}{1}".format(_unicode2str(self.variables[iv]),
                                        _row_data)
                deviation = float(row[column_id]) - mean_stds[combi_str][0]
                if (relation == ">" and
                    deviation > fac * mean_stds[combi_str][1]) or \
                   (relation == "=>" or relation == ">=" and
                    deviation >= fac * mean_stds[combi_str][1]) or \
                   (relation == "<" and
                    deviation < -fac * mean_stds[combi_str][1]) or \
                   (relation == "=<" or relation == "<=" and
                        deviation <= -fac * mean_stds[combi_str][1]):
                    idx.append(cnt)
            return idx
        else:
            if relation == "!=":
                comp = (col != val)
            elif relation == "==":
                comp = (col == val)
            elif relation == "<":
                comp = (col < val)
            elif relation == ">":
                comp = (col > val)
            elif relation == "=<" or relation == "<=":
                comp = (col <= val)
            elif relation == "=>" or relation == ">=":
                comp = (col >= val)
            else:
                comp = None  # should never occur
            if isinstance(comp, bool):
                raise RuntimeError(
                    "Incorrect syntax for " + "exception: '{0} {1}'".format(
                        _unicode2str(relation), _unicode2str(value)))
            return _np.flatnonzero(comp)
Example #21
0
    def reset(self,
              data_folder,
              file_name,
              suffix=_default_suffix,
              variables=None):
        """Reset the aggregator class and clear design.

        Parameters
        ----------
        data_folder : str
            folder which contains of data of the subjects
        file_name : str
            name of the files. All files that start with this name
            will be considered for the analysis (cf. aggregator.data_files)
        suffix : str, optional
            if specified only files that end with this particular suffix
            will be considered (default=.xpd)
        variables : array of str, optional
            array of variable names, process only the specified variables


        """

        self._data_folder = data_folder
        self._file_name = file_name
        self._data_files = []
        self._variables = []
        self._dv = []
        self._dv_txt = []
        self._iv = []
        self._iv_txt = []
        self._exclusions = []
        self._exclusions_txt = []
        self._computes = []
        self._computes_txt = []
        self._recode_txt = []
        self._recode = []
        self._subject_variables = []
        self._last_data = []
        self._added_data = []
        self._added_variables = []
        self._suffix = suffix

        for flname in _os.listdir(_os.path.dirname(self._data_folder + "/")):
            if flname.endswith(self._suffix) and \
                    flname.startswith(self._file_name):
                _data, vnames, _subject_info, _comments = \
                    read_datafile(self._data_folder + "/" + flname,
                                  read_variables=variables)

                if len(self._variables) < 1:
                    self._variables = vnames
                else:
                    if vnames != self._variables:
                        message = u"Different variables in ".format(flname)
                        message = message + u"\n{0}".format(vnames)
                        message = message + u"\ninstead of\n{0}".format(
                            self._variables)
                        raise RuntimeError(_unicode2str(message))
                self._data_files.append(flname)

        if len(self._data_files) < 1:
            raise Exception("No data files found in {0}".format(
                _unicode2str(self._data_folder)))

        print "found {0} subject_data sets".format(len(self._data_files))
        print "found {0} variables: {1}".format(len(
            self._variables), [_unicode2str(x) for x in self._variables])
Example #22
0
    def get_data(self,
                 filename,
                 recode_variables=True,
                 compute_new_variables=True,
                 exclude_trials=True):
        """Read data from from a single Expyriment data file.

        Notes
        -----
        The function can be only applied on data of aggregator.data_files,
        that is, on the files in the defined data folder that start with
        the experiment name. According to the defined design, the result
        contains recoded data together with the new computed variables, and the
        subject variables from the headers of the Expyriment data files.

        Parameters
        ----------
        filename : str
            name of the Expyriment data file
        recode_variables : bool, optional
            set to False if defined variable recodings should not be applied
            (default=True)
        compute_new_variables : bool, optional
            set to False if new defined variables should not be computed
            (default=True)
        exclude_trials : bool, optional
            set to False if exclusion rules should not be applied
            (default=True)

        Returns
        -------
        data : numpy.array
        var_names : list
            list of variable names
        info : str
            subject info
        comment : str
            comments in data

        """

        # check filename
        if filename not in self._data_files:
            raise RuntimeError("'{0}' is not in the data list\n".format(
                _unicode2str(filename)))

        data, _vnames, subject_info, comments = \
            read_datafile(self._data_folder + "/" + filename)
        print "   reading {0}".format(_unicode2str(filename))

        if recode_variables:
            for var_id, recoding in self._recode:
                for old, new in recoding:
                    for row in range(len(data)):
                        if data[row][var_id] == old:
                            data[row][var_id] = new

        data = _np.array(data, dtype='|S99')
        # compute new defined variables and append
        if compute_new_variables:
            for new_var_name, var_def in self._computes:
                if var_def[1] in self._relations:
                    # relations are true or false
                    col = _np.zeros([data.shape[0], 1], dtype=int)
                    idx = self._find_idx(data, var_def[0], var_def[1],
                                         var_def[2])
                    col[idx, 0] = 1
                else:
                    # operations
                    try:
                        a = _np.float64([data[:, var_def[0]]]).transpose()
                        second_var_id = self._get_variable_id(
                            var_def[2], False)
                        if second_var_id is not None:
                            b = _np.float64([data[:,
                                                  second_var_id]]).transpose()
                        else:
                            b = _np.float64(var_def[2])
                    except:
                        msg = "Error while computing new variable {0}. " + \
                              "Non-number in variables of {1}"
                        msg.format(new_var_name, filename)
                        raise RuntimeError(msg)
                    if var_def[1] == "+":
                        col = a + b
                    elif var_def[1] == "-":
                        col = a - b
                    elif var_def[1] == "*":
                        col = a * b
                    elif var_def[1] == "/":
                        col = a / b
                    elif var_def[1] == "%":
                        col = a % b
                data = _np.concatenate((data, col), axis=1)

        # add subject information
        for sv in self.subject_variables:
            try:
                info = subject_info[sv]
            except:
                info = "nan"
            col = _np.array([[info for _x in range(data.shape[0])]])
            data = _np.c_[data, col.transpose()]

        # _add_exclusion trials
        if exclude_trials:
            for exl in self._exclusions:
                idx = self._find_idx(data, exl[0], exl[1], exl[2])
                if len(idx) > 0:
                    data = _np.delete(data, idx, axis=0)

        var = _copy(self._variables)
        var.extend(self._subject_variables)
        return [data, var, subject_info, comments]