Ejemplo n.º 1
0
def adapt_passim_code():
    oErr = ErrHandle()
    bResult = True
    msg = ""
    
    try:
        # Walk all SSGs
        need_correction = {}
        for obj in EqualGold.objects.all().order_by("-id"):
            code = obj.code
            if code != None and code != "ZZZ_DETERMINE":
                count = EqualGold.objects.filter(code=code).count()
                if count > 1:
                    oErr.Status("Duplicate code={} id={}".format(code, obj.id))
                    if code in need_correction:
                        need_correction[code].append(obj.id)
                    else:                            
                        need_correction[code] = [obj.id]
        oErr.Status(json.dumps(need_correction))
        for k,v in need_correction.items():
            code = k
            ssg_list = v
            for ssg_id in ssg_list[:-1]:
                oErr.Status("Changing CODE for id {}".format(ssg_id))
                obj = EqualGold.objects.filter(id=ssg_id).first()
                if obj != None:
                    obj.code = None
                    obj.number = None
                    obj.save()
                    oErr.Status("Re-saved id {}, code is now: {}".format(obj.id, obj.code))

    except:
        bResult = False
        msg = oErr.get_error_message()
    return bResult, msg
Ejemplo n.º 2
0
    def get_galway(self, catalogue_id):
        # Read the manuscript with the indicated id

        oBack = None
        bResult = False
        oErr = ErrHandle()
        try:
            url = "https://elmss.nuigalway.ie/api/v1/catalogue/{}".format(
                catalogue_id)
            try:
                r = requests.get(url)
            except:
                sMsg = oErr.get_error_message()
                oErr.DoError("Request problem")
                return False, sMsg
            if r.status_code == 200:
                # Read the response
                sText = r.text
                oBack = json.loads(sText)
                bResult = True
            else:
                bResult = False
                sResult = "download_file received status {} for {}".format(
                    r.status_code, url)
                oErr.Status("get_galway reading error: {}".format(sResult))
        except:
            msg = oErr.get_error_message()
            oErr.DoError("get_galway")
            oBack = None

        return oBack
Ejemplo n.º 3
0
def adapt_manuprov_m2m():
    oErr = ErrHandle()
    bResult = True
    msg = ""
    
    try:
        # Issue #289: back to m2m connection
        prov_changes = 0
        prov_added = 0
        # Keep a list of provenance id's that may be kept
        keep_id = []

        # Remove all previous ProvenanceMan connections
        ProvenanceMan.objects.all().delete()
        # Get all the manuscripts
        for manu in Manuscript.objects.all():
            # Treat all the M2O provenances for this manuscript
            for prov in manu.manuprovenances.all():
                # Get the *name* and the *loc* for this prov
                name = prov.name
                loc = prov.location
                note = prov.note
                # Get the very *first* provenance with name/loc
                firstprov = Provenance.objects.filter(name__iexact=name, location=loc).first()
                if firstprov == None:
                    # Create one
                    firstprov = Provenance.objects.create(name=name, location=loc)
                keep_id.append(firstprov.id)
                # Add the link
                link = ProvenanceMan.objects.create(manuscript=manu, provenance=firstprov, note=note)
        # Walk all provenances to remove the unused ones
        delete_id = []
        for prov in Provenance.objects.all().values('id'):
            if not prov['id'] in keep_id:
                delete_id.append(prov['id'])
        oErr.Status("Deleting provenances: {}".format(len(delete_id)))
        Provenance.objects.filter(id__in=delete_id).delete()

        # Success
        oErr.Status("adapt_manuprov_m2m: {} changes, {} additions".format(prov_changes, prov_added))
    except:
        bResult = False
        msg = oErr.get_error_message()
    return bResult, msg
Ejemplo n.º 4
0
def adapt_latin_names():
    oErr = ErrHandle()
    bResult = True
    msg = ""    
    re_pattern = r'^\s*(?:\b[A-Z][a-zA-Z]+\b\s*)+(?=[_])'
    re_name = r'^[A-Z][a-zA-Z]+\s*'

    def add_names(main_list, fragment):
        oErr = ErrHandle()
        try:
            for sentence in fragment.replace("_", "").split("."):
                # WOrk through non-initial words
                words = re.split(r'\s+', sentence)
                for word in words[1:]:
                    if re.match(re_name, word):
                        if not word in main_list:
                            main_list.append(word)
        except:
            msg = oErr.get_error_message()
            oErr.DoError("add_names")

    try:
        # Start list of common names
        common_names = []
        # Walk all SSGs that are not templates
        with transaction.atomic():
            incexpl_list = EqualGold.objects.values('incipit', 'explicit')
            for incexpl in incexpl_list:
                # Treat incipit and explicit
                inc = incexpl['incipit']
                if inc != None: add_names(common_names, inc)
                expl = incexpl['explicit']
                if expl != None:  add_names(common_names, expl)
            # TRansform word list
            names_list = sorted(common_names)
            oErr.Status("Latin common names: {}".format(json.dumps(names_list)))
    except:
        bResult = False
        msg = oErr.get_error_message()
    return bResult, msg
Ejemplo n.º 5
0
    def get_setlist(self, pivot_id=None):
        """Get the set of lists for this particular DCT"""

        oErr = ErrHandle()
        oBack = None
        try:
            # Get to the research set
            id_list = [
                x['id']
                for x in self.researchset.researchset_setlists.all().order_by(
                    'order').values("id")
            ]
            lst_rset = []
            if pivot_id != None and pivot_id in id_list:
                lst_rset.append(pivot_id)
            # Add the remaining ID's
            for id in id_list:
                if not id in lst_rset: lst_rset.append(id)

            # Check the number of lists in the research set
            if lst_rset == None or len(lst_rset) < 2:
                oErr.Status("Not enough SSG-lists to compare")
                return None

            # We have enough lists: Get the lists of SSGs for each
            lst_ssglists = []
            for setlist_id in lst_rset:
                # Get the actual setlist
                setlist = SetList.objects.filter(id=setlist_id).first()
                if setlist != None:
                    # Create an empty SSG-list
                    oSsgList = {}
                    # Add the object itself
                    oSsgList['obj'] = setlist
                    # Add the name object for this list
                    oSsgList['title'] = setlist.get_title_object()
                    # Get the list of SSGs for this list
                    oSsgList['ssglist'] = setlist.get_ssg_list()
                    # Add the list object to the list
                    lst_ssglists.append(oSsgList)

            # Return this list of lists
            oBack = dict(ssglists=lst_ssglists)
            # Prepare and create an appropriate table = list of rows
            rows = []
            # Create header row
            oRow = []
            oRow.append('Gr/Cl/Ot')
            for oSsgList in lst_ssglists:
                # Add the title *object*
                oRow.append(oSsgList['title'])
            rows.append(oRow)

            # Start out with the pivot: the *first* one in 'ssglist'
            lst_pivot = lst_ssglists[0]
            for oPivot in lst_pivot['ssglist']:
                # Create a row based on this pivot
                oRow = []
                # (1) row header
                oRow.append(oPivot['sig'])
                # (2) pivot SSG number
                oRow.append(oPivot['order'])
                # (3) SSG number in all other manuscripts
                ssg_id = oPivot['super']
                for lst_this in lst_ssglists[1:]:
                    bFound = False
                    order = ""
                    for oItem in lst_this['ssglist']:
                        if ssg_id == oItem['super']:
                            # Found it!
                            order = oItem['order']
                            bFound = True
                            break
                    oRow.append(order)
                # (4) add the row to the list
                rows.append(oRow)
            # Make sure we return the right information
            oBack['setlist'] = rows
        except:
            msg = oErr.get_error_message()
            oErr.DoError("SetDef/get_setlist")
        return oBack
Ejemplo n.º 6
0
def adapt_codicocopy(oStatus=None):
    """Create Codico's and copy Manuscript information to Codico"""
    oErr = ErrHandle()
    bResult = True
    msg = ""
    count_add = 0       # Codico layers added
    count_copy = 0      # Codico layers copied
    count_tem = 0       # Template codico changed
    oBack = dict(status="ok", msg="")

    try:
        # TODO: add code here and change to True
        bResult = False

        # Walk through all manuscripts (that are not templates)
        manu_lst = []
        for manu in Manuscript.objects.filter(mtype__iregex="man|tem"):
            # Check if this manuscript already has Codico's
            if manu.manuscriptcodicounits.count() == 0:
                # Note that Codico's must be made for this manuscript
                manu_lst.append(manu.id)
        # Status message
        oBack['total'] = "Manuscripts without codico: {}".format(len(manu_lst))
        if oStatus != None: oStatus.set("ok", oBack)
        # Create the codico's for the manuscripts
        with transaction.atomic():
            for idx, manu_id in enumerate(manu_lst):
                # Debugging message
                msg = "Checking manuscript {} of {}".format(idx+1, len(manu_lst))
                oErr.Status(msg)

                # Status message
                oBack['total'] = msg
                if oStatus != None: oStatus.set("ok", oBack)

                manu = Manuscript.objects.filter(id=manu_id).first()
                if manu != None:
                    bResult, msg = add_codico_to_manuscript(manu)
                    count_add += 1
        oBack['codico_added'] = count_add

        # Checking up on manuscripts that are imported (stype='imp') but whose Codico has not been 'fixed' yet
        manu_lst = Manuscript.objects.filter(stype="imp").exclude(itype="codico_copied")
        # Status message
        oBack['total'] = "Imported manuscripts whose codico needs checking: {}".format(len(manu_lst))
        if oStatus != None: oStatus.set("ok", oBack)
        with transaction.atomic():
            for idx, manu in enumerate(manu_lst):
                # Show what we are doing
                oErr.Status("Checking manuscript {} of {}".format(idx+1, len(manu_lst)))
                # Actually do it
                bResult, msg = add_codico_to_manuscript(manu)
                if bResult:
                    manu.itype = "codico_copied"
                    manu.save()
                    count_copy += 1
        oBack['codico_copied'] = count_copy

        # Adapt codico's for templates
        codico_name = "(No codicological definition for a template)" 
        with transaction.atomic():
            for codico in Codico.objects.filter(manuscript__mtype="tem"):
                # Make sure the essential parts are empty!!
                bNeedSaving = False
                if codico.name != codico_name : 
                    codico.name = codico_name
                    bNeedSaving = True
                if codico.notes != None: codico.notes = None ; bNeedSaving = True
                if codico.support != None: codico.support = None ; bNeedSaving = True
                if codico.extent != None: codico.extent = None ; bNeedSaving = True
                if codico.format != None: codico.format = None ; bNeedSaving = True
                if bNeedSaving:
                    codico.save()
                    count_tem += 1
        oBack['codico_template'] = count_tem

        if oStatus != None: oStatus.set("finished", oBack)

        # Note that we are indeed ready
        bResult = True
    except:
        msg = oErr.get_error_message()
        bResult = False
    return bResult, msg
Ejemplo n.º 7
0
    def do_hier_method3(self, ssg_corpus, names_list):
        """Calculate distance matrix with PYSTYL, do the rest myself"""
        oErr = ErrHandle()
        node_list = []
        link_list = []
        max_value = 0

        def dm_to_leo(matrix, crp):
            links = []
            i = 0
            maxv = 0
            for row_id, row in enumerate(matrix):
                # Calculate the link
                minimum = None
                min_id = -1
                for col_idx in range(row_id + 1, len(row) - 1):
                    value = row[col_idx]
                    if minimum == None:
                        if value > 0: minimum = value
                    elif value < minimum:
                        minimum = value
                        min_id = col_idx
                if minimum != None and min_id >= 0:
                    # Create a link
                    oLink = dict(source_id=row_id,
                                 source=crp.titles[row_id],
                                 target_id=min_id,
                                 target=crp.titles[min_id],
                                 value=minimum)
                    links.append(oLink)
                    # Keep track of max_value
                    if minimum > maxv:
                        maxv = minimum

            return links, maxv

        def get_nodes(crp):
            nodes = []
            for idx, item in enumerate(crp.target_ints):
                oNode = dict(group=item,
                             author=crp.target_idx[item],
                             id=crp.titles[idx],
                             scount=5)
                nodes.append(oNode)
            return nodes

        do_example = False
        try:
            # Create a pystyl-corpus object (see above)
            sty_corpus = Corpus(texts=[],
                                titles=[],
                                target_ints=[],
                                target_idx=[])

            ssg_dict = {}

            # Walk the ssg_corpus: each SSG is one 'text', having a title and a category (=author code)
            for idx, item in enumerate(
                    EqualGoldCorpusItem.objects.filter(
                        corpus=ssg_corpus).values('words', 'authorname',
                                                  'equal__code', 'equal__id')):
                # Determine the name for this row
                category = item['authorname']
                code = item['equal__code']
                if code == None or code == "" or not " " in code or not "." in code:
                    title = "eqg{}".format(item['equal__id'])
                else:
                    title = code.split(" ")[1]
                # The text = the words
                text = " ".join(json.loads(item['words']))

                # Add the text to the corpus
                if title in sty_corpus.titles:
                    ssg_id = -1
                    bFound = False
                    for k, v in ssg_dict.items():
                        if v == title:
                            ssg_id = k
                            bFound = True
                            break
                    oErr.Status(
                        "EqualGoldGraph/do_manu_method: attempt to add same title '{}' for {} and {}"
                        .format(title, ssg_id, item['equal__id']))
                else:
                    # Also make sure to buidl an SSG-dictionary
                    ssg_dict[item['equal__id']] = title

                    sty_corpus.add_text(text, title, category)

            # We now 'have' the corpus, so we can work with it...
            sty_corpus.preprocess(alpha_only=True, lowercase=True)
            sty_corpus.tokenize()

            # REmove the common names
            sty_corpus.remove_tokens(rm_tokens=names_list, rm_pronouns=False)

            # Vectorize the corpus
            sty_corpus.vectorize(mfi=200,
                                 ngram_type="word",
                                 ngram_size=1,
                                 vector_space="tf_std")

            # Get a list of nodes
            node_list = get_nodes(sty_corpus)

            # Create a distance matrix
            dm = distance_matrix(sty_corpus, "minmax")

            # Convert the distance matrix into a list of 'nearest links'
            link_list, max_value = dm_to_leo(dm, sty_corpus)

            # Convert the cluster_tree into a node_list and a link_list (i.e. get the list of edges)
            iRead = 1

        except:
            msg = oErr.get_error_message()
            oErr.DoError("do_hier_method1")

        return node_list, link_list, max_value
Ejemplo n.º 8
0
    def do_manu_method(self, ssg_corpus, manu_list, min_value):
        """Calculation like Manuscript_Transmission_Of_se172
        
        The @min_value is the minimum link-value the user wants to see
        """
        oErr = ErrHandle()
        node_list = []
        link_list = []
        max_value = 0  # Maximum number of manuscripts in which an SSG occurs
        max_scount = 1  # Maximum number of sermons associated with one SSG
        manu_count = []

        def get_nodes(crp):
            nodes = []
            for idx, item in enumerate(crp.target_ints):
                oNode = dict(group=item,
                             author=crp.target_idx[item],
                             id=crp.titles[idx])
                nodes.append(oNode)
            return nodes

        try:
            # Create a pystyl-corpus object (see above)
            sty_corpus = Corpus(texts=[],
                                titles=[],
                                target_ints=[],
                                target_idx=[])

            ssg_dict = {}
            link_dict = {}
            scount_dict = {}
            node_listT = []
            link_listT = []

            # Initialize manu_count: the number of SSG co-occurring in N manuscripts
            for item in manu_list:
                manu_count.append(0)

            # Walk the ssg_corpus: each SSG is one 'text', having a title and a category (=author code)
            for idx, item in enumerate(
                    EqualGoldCorpusItem.objects.filter(
                        corpus=ssg_corpus).values('words', 'authorname',
                                                  'scount', 'equal__code',
                                                  'equal__id')):
                # Determine the name for this row
                category = item['authorname']
                code = item['equal__code']
                if code == None or code == "" or not " " in code or not "." in code:
                    title = "eqg{}".format(item['equal__id'])
                else:
                    title = code.split(" ")[1]
                # The text = the words
                text = " ".join(json.loads(item['words']))

                # the scount and store it in a dictionary
                scount_dict[title] = item['scount']
                if item['scount'] > max_scount:
                    max_scount = item['scount']

                # Add the text to the corpus
                if title in sty_corpus.titles:
                    ssg_id = -1
                    bFound = False
                    for k, v in ssg_dict.items():
                        if v == title:
                            ssg_id = k
                            bFound = True
                            break
                    oErr.Status(
                        "EqualGoldGraph/do_manu_method: attempt to add same title '{}' for {} and {}"
                        .format(title, ssg_id, item['equal__id']))
                else:
                    # Also make sure to buidl an SSG-dictionary
                    ssg_dict[item['equal__id']] = title

                    sty_corpus.add_text(text, title, category)

            # Get a list of nodes
            node_listT = get_nodes(sty_corpus)

            # Walk the manuscripts
            for manu_item in manu_list:
                manu_id = manu_item["manu_id"]
                # Get a list of all SSGs in this manuscript
                ssg_list = SermonDescrEqual.objects.filter(
                    manu__id=manu_id).order_by('super_id').distinct().values(
                        'super_id')
                ssg_list_id = [x['super_id'] for x in ssg_list]
                # evaluate links between a source and target SSG
                for idx_s, source_id in enumerate(ssg_list_id):
                    # sanity check
                    if source_id in ssg_dict:
                        # Get the title of the source
                        source = ssg_dict[source_id]
                        for idx_t in range(idx_s + 1, len(ssg_list_id) - 1):
                            target_id = ssg_list_id[idx_t]
                            # Double check
                            if target_id in ssg_dict:
                                # Get the title of the target
                                target = ssg_dict[target_id]
                                # Retrieve or create a link from the link_listT
                                link_code = "{}_{}".format(
                                    source_id, target_id)
                                if link_code in link_dict:
                                    oLink = link_listT[link_dict[link_code]]
                                else:
                                    oLink = dict(source=source,
                                                 source_id=source_id,
                                                 target=target,
                                                 target_id=target_id,
                                                 value=0)
                                    link_listT.append(oLink)
                                    link_dict[link_code] = len(link_listT) - 1
                                # Now add to the value
                                oLink['value'] += 1
                                if oLink['value'] > max_value:
                                    max_value = oLink['value']

            # Only accept the links that have a value >= min_value
            node_dict = []
            for oItem in link_listT:
                if oItem['value'] >= min_value:
                    link_list.append(copy.copy(oItem))
                    # Take note of the nodes
                    src = oItem['source']
                    dst = oItem['target']
                    if not src in node_dict: node_dict.append(src)
                    if not dst in node_dict: node_dict.append(dst)
            # Walk the nodes
            for oItem in node_listT:
                if oItem['id'] in node_dict:
                    oItem['scount'] = 100 * scount_dict[
                        oItem['id']] / max_scount
                    node_list.append(copy.copy(oItem))

        except:
            msg = oErr.get_error_message()
            oErr.DoError("do_hier_method1")

        return node_list, link_list, max_value
Ejemplo n.º 9
0
    def do_manu_method(self, ssg_corpus, manu_list, min_value):
        """Calculation like 'MedievalManuscriptTransmission' description
        
        The @min_value is the minimum link-value the user wants to see
        """

        oErr = ErrHandle()
        author_dict = {}
        node_list = []
        link_list = []
        max_value = 0  # Maximum number of manuscripts in which an SSG occurs
        max_scount = 1  # Maximum number of sermons associated with one SSG

        try:
            # Initializations
            ssg_dict = {}
            link_dict = {}
            scount_dict = {}
            node_listT = []
            link_listT = []
            node_set = {}  # Put the nodes in a set, with their SSG_ID as key
            title_set = {}  # Link from title to SSG_ID

            # Walk the ssg_corpus: each SSG is one 'text', having a title and a category (=author code)
            for idx, item in enumerate(
                    EqualGoldCorpusItem.objects.filter(
                        corpus=ssg_corpus).values('authorname', 'scount',
                                                  'equal__code', 'equal__id')):
                # Determine the name for this row
                category = item['authorname']
                ssg_id = item['equal__id']
                code = item['equal__code']
                if code == None or code == "" or not " " in code or not "." in code:
                    title = "eqg{}".format(ssg_id)
                else:
                    title = code.split(" ")[1]
                # Get the Signature that is most appropriate
                sig = get_ssg_sig(ssg_id)

                # Add author to dictionary
                if not category in author_dict: author_dict[category] = 0
                author_dict[category] += 1

                # the scount and store it in a dictionary
                scount = item['scount']
                scount_dict[ssg_id] = scount
                if scount > max_scount:
                    max_scount = scount

                node_key = ssg_id
                node_value = dict(label=title,
                                  category=category,
                                  scount=scount,
                                  sig=sig,
                                  rating=0)
                if node_key in node_set:
                    oErr.Status(
                        "EqualGoldGraph/do_manu_method: attempt to add same title '{}' for {} and {}"
                        .format(title, ssg_id, title_set[title]))
                else:
                    node_set[node_key] = node_value
                    title_set[title] = ssg_id

            # Create list of authors
            author_list = [
                dict(category=k, count=v) for k, v in author_dict.items()
            ]
            author_list = sorted(author_list,
                                 key=lambda x:
                                 (-1 * x['count'], x['category'].lower()))

            # Create a dictionary of manuscripts, each having a list of SSG ids
            manu_set = {}
            for manu_item in manu_list:
                manu_id = manu_item["manu_id"]
                # Get a list of all SSGs in this manuscript
                ssg_list = SermonDescrEqual.objects.filter(
                    manu__id=manu_id).order_by('super_id').distinct()
                # Add the SSG id list to the manuset
                manu_set[manu_id] = [x.super for x in ssg_list]

            # Create a list of edges based on the above
            link_dict = {}
            for manu_id, ssg_list in manu_set.items():
                # Only treat ssg_lists that are larger than 1
                if len(ssg_list) > 1:
                    # itertool.combinations creates all combinations of SSG to SSG in one manuscript
                    for subset in itertools.combinations(ssg_list, 2):
                        source = subset[0]
                        target = subset[1]
                        source_id = source.id
                        target_id = target.id
                        link_code = "{}_{}".format(source_id, target_id)
                        if link_code in link_dict:
                            oLink = link_dict[link_code]
                        else:
                            oLink = dict(source=source_id,
                                         target=target_id,
                                         value=0)
                            link_dict[link_code] = oLink
                        # Add 1
                        oLink['value'] += 1
                        if oLink['value'] > max_value:
                            max_value = oLink['value']
            # Turn the link_dict into a list
            # link_list = [v for k,v in link_dict.items()]

            # Only accept the links that have a value >= min_value
            node_dict = {}
            link_list = []
            for k, oItem in link_dict.items():
                if oItem['value'] >= min_value:
                    link_list.append(copy.copy(oItem))
                    # Take note of the nodes
                    src = oItem['source']
                    dst = oItem['target']
                    if not src in node_dict: node_dict[src] = node_set[src]
                    if not dst in node_dict: node_dict[dst] = node_set[dst]
            # Walk the nodes
            node_list = []
            for ssg_id, oItem in node_dict.items():
                oItem['id'] = ssg_id
                oItem['scount'] = 100 * scount_dict[oItem['id']] / max_scount
                node_list.append(copy.copy(oItem))

        except:
            msg = oErr.get_error_message()
            oErr.DoError("do_manu_method")

        return node_list, link_list, author_list, max_value
Ejemplo n.º 10
0
    def process_files(self, request, source, lResults, lHeader):
        file_list = []
        oErr = ErrHandle()
        bOkay = True
        code = ""
        oStatus = self.oStatus
        try:
            # Make sure we have the username
            username = self.username
            profile = Profile.get_user_profile(username)
            team_group = app_editor
            kwargs = {
                'profile': profile,
                'username': username,
                'team_group': team_group
            }

            # Get the contents of the imported file
            files = request.FILES.getlist('files_field')
            if files != None:
                for data_file in files:
                    filename = data_file.name
                    file_list.append(filename)

                    # Set the status
                    oStatus.set("reading", msg="file={}".format(filename))

                    # Get the source file
                    if data_file == None or data_file == "":
                        self.arErr.append(
                            "No source file specified for the selected project"
                        )
                    else:
                        # Check the extension
                        arFile = filename.split(".")
                        extension = arFile[len(arFile) - 1]

                        lst_manual = []
                        lst_read = []

                        # Further processing depends on the extension
                        oResult = {
                            'status': 'ok',
                            'count': 0,
                            'sermons': 0,
                            'msg': "",
                            'user': username
                        }

                        if extension == "xlsx":
                            # This is an Excel file: read the file using openpyxl
                            # Write data temporarily to the WRITABLE dir, but with a temporary filename
                            tmp_path = os.path.abspath(
                                os.path.join(MEDIA_DIR, filename))
                            with io.open(tmp_path, "wb") as f:
                                sData = data_file.read()
                                f.write(sData)

                            # Read string file
                            wb = openpyxl.load_workbook(tmp_path,
                                                        read_only=True)
                            sheetnames = wb.sheetnames
                            ws_manu = None
                            ws_sermo = None
                            for sname in sheetnames:
                                if "manu" in sname.lower():
                                    ws_manu = wb[sname]
                                elif "sermo" in sname.lower():
                                    ws_sermo = wb[sname]
                            # Do we have a manuscript worksheet?
                            if ws_manu != None:
                                # Process the manuscript-proper details: columns Name and Value
                                oManu = {}
                                row_num = 1
                                if ws_manu.cell(row=row_num, column=1).value.lower() == "field" and \
                                   ws_manu.cell(row=row_num, column=2).value.lower() == "value":
                                    # we can skip the first row
                                    row_num += 1
                                bStop = False
                                while not bStop:
                                    k = ws_manu.cell(row=row_num,
                                                     column=1).value
                                    v = ws_manu.cell(row=row_num,
                                                     column=2).value
                                    if k == "" or k == None:
                                        bStop = True
                                    else:
                                        row_num += 1
                                        k = k.lower()
                                        oManu[k] = v
                                # We have an object with key/value pairs: process it
                                manu = Manuscript.custom_add(oManu, **kwargs)

                                # Now get the codicological unit that has been automatically created and adapt it
                                codico = manu.manuscriptcodicounits.first()
                                if codico != None:
                                    oManu['manuscript'] = manu
                                    codico = Codico.custom_add(oManu, **kwargs)

                                oResult['count'] += 1
                                oResult['obj'] = manu
                                oResult['name'] = manu.idno

                                # Check if there is a "Sermon" worksheet
                                if ws_sermo != None:
                                    # Get the column names
                                    row_num = 1
                                    column = 1
                                    header = []
                                    v = ws_sermo.cell(row=row_num,
                                                      column=column).value
                                    while v != None and v != "" and v != "-":
                                        header.append(v.lower())
                                        column += 1
                                        v = ws_sermo.cell(row=row_num,
                                                          column=column).value
                                    # Process the sermons in this sheet
                                    sermon_list = []
                                    column = 1
                                    row_num += 1
                                    v = ws_sermo.cell(row=row_num,
                                                      column=column).value
                                    while v != "" and v != None:
                                        # ==== DEBUG ====
                                        oErr.Status(
                                            "Upload excel row_num={}".format(
                                                row_num))
                                        # ===============

                                        # Create a new sermon object
                                        oSermon = {}
                                        # Process this row
                                        for idx, col_name in enumerate(header):
                                            column = idx + 1
                                            oSermon[col_name] = ws_sermo.cell(
                                                row=row_num,
                                                column=column).value
                                        # Process this sermon
                                        order = oSermon['order']
                                        sermon = SermonDescr.custom_add(
                                            oSermon, manu, order)

                                        oResult['sermons'] += 1

                                        # Get parent, firstchild, next
                                        parent = oSermon['parent']
                                        firstchild = oSermon['firstchild']
                                        nextone = oSermon['next']
                                        # Add to list
                                        sermon_list.append({
                                            'order': order,
                                            'parent': parent,
                                            'firstchild': firstchild,
                                            'next': nextone,
                                            'sermon': sermon
                                        })
                                        # GO to the next row for the next sermon
                                        row_num += 1
                                        column = 1
                                        v = ws_sermo.cell(row=row_num,
                                                          column=column).value

                                    # Now process the parent/firstchild/next items
                                    with transaction.atomic():
                                        for oSermo in sermon_list:
                                            # Get the p/f/n numbers
                                            parent_id = oSermo['parent']
                                            firstchild_id = oSermo[
                                                'firstchild']
                                            next_id = oSermo['next']
                                            # Process parent
                                            if parent_id != '' and parent_id != None:
                                                # parent_id = str(parent_id)
                                                parent = next(
                                                    (obj['sermon']
                                                     for obj in sermon_list
                                                     if obj['order'] ==
                                                     parent_id), None)
                                                oSermo[
                                                    'sermon'].msitem.parent = parent.msitem
                                                oSermo['sermon'].msitem.save()
                                            # Process firstchild
                                            if firstchild_id != '' and firstchild_id != None:
                                                # firstchild_id = str(firstchild_id)
                                                firstchild = next(
                                                    (obj['sermon']
                                                     for obj in sermon_list
                                                     if obj['order'] ==
                                                     firstchild_id), None)
                                                oSermo[
                                                    'sermon'].msitem.firstchild = firstchild.msitem
                                                oSermo['sermon'].msitem.save()
                                            # Process next
                                            if next_id != '' and next_id != None:
                                                # next_id = str(next_id)
                                                nextone = next(
                                                    (obj['sermon']
                                                     for obj in sermon_list
                                                     if obj['order'] == next_id
                                                     ), None)
                                                oSermo[
                                                    'sermon'].msitem.next = nextone.msitem
                                                oSermo['sermon'].msitem.save()

                        # Create a report and add it to what we return
                        oContents = {
                            'headers': lHeader,
                            'list': lst_manual,
                            'read': lst_read
                        }
                        oReport = Report.make(username, "ixlsx",
                                              json.dumps(oContents))

                        # Determine a status code
                        statuscode = "error" if oResult == None or oResult[
                            'status'] == "error" else "completed"
                        if oResult == None:
                            self.arErr.append(
                                "There was an error. No manuscripts have been added"
                            )
                        else:
                            lResults.append(oResult)
            code = "Imported using the [import_excel] function on this filew: {}".format(
                ", ".join(file_list))
        except:
            bOkay = False
            code = oErr.get_error_message()
        return bOkay, code
Ejemplo n.º 11
0
    def process_files(self, request, source, lResults, lHeader):
        file_list = []
        oErr = ErrHandle()
        bOkay = True
        code = ""
        oStatus = self.oStatus
        # The list of headers to be shown
        lHeader = [
            'status', 'msg', 'name', 'daterange', 'library', 'idno', 'url'
        ]

        def add_manu(lst_manual,
                     lst_read,
                     status="",
                     msg="",
                     user="",
                     name="",
                     url="",
                     daterange="",
                     library="",
                     filename="",
                     sermons="",
                     idno=""):
            oInfo = {}
            oInfo['status'] = status
            oInfo['msg'] = msg
            oInfo['user'] = user
            oInfo['name'] = name
            oInfo['url'] = url
            oInfo['daterange'] = daterange
            oInfo['library'] = library
            oInfo['idno'] = idno
            oInfo['filename'] = filename
            oInfo['sermons'] = sermons
            if status == "error":
                lst_manual.append(oInfo)
            else:
                lst_read.append(oInfo)
            return True

        try:
            # Make sure we have the username
            username = self.username
            profile = Profile.get_user_profile(username)
            team_group = app_editor
            kwargs = {
                'profile': profile,
                'username': username,
                'team_group': team_group,
                'source': source
            }

            # Get the contents of the imported file
            files = request.FILES.getlist('files_field')
            if files != None:
                for data_file in files:
                    filename = data_file.name
                    file_list.append(filename)

                    # Set the status
                    oStatus.set("reading", msg="file={}".format(filename))

                    # Get the source file
                    if data_file == None or data_file == "":
                        self.arErr.append(
                            "No source file specified for the selected project"
                        )
                    else:
                        # Check the extension
                        arFile = filename.split(".")
                        extension = arFile[len(arFile) - 1]

                        lst_manual = []
                        lst_read = []

                        # Further processing depends on the extension
                        oResult = {
                            'status': 'ok',
                            'count': 0,
                            'sermons': 0,
                            'msg': "",
                            'user': username,
                            'filename': filename
                        }

                        if extension == "csv":
                            # This is a CSV file. We expect the catalogue id's to be in the leftmost column

                            # Write data temporarily to the WRITABLE dir, but with a temporary filename
                            tmp_path = os.path.abspath(
                                os.path.join(MEDIA_DIR, filename))
                            with io.open(tmp_path, "wb") as f:
                                sData = data_file.read()
                                f.write(sData)

                            # Read the CSV file with a reader
                            with open(tmp_path, "r", encoding="utf-8") as f:
                                reader = csv.reader(f,
                                                    delimiter=",",
                                                    dialect='excel')
                                # Read the header cells and make a header row in the worksheet
                                headers = next(reader)
                                row_num = 1
                                column = 1
                                lCsv = []
                                for row in reader:
                                    # Keep track of the EXCEL row we are in
                                    row_num += 1
                                    # Get the ID
                                    cell_value = row[0]
                                    if cell_value != None and cell_value != "":
                                        # Get the catalogue id
                                        catalogue_id = int(cell_value)

                                        # Set the status
                                        oStatus.set(
                                            "reading",
                                            msg="catalogue id={}".format(
                                                catalogue_id))

                                        # Clear the galway and codico objects
                                        oGalway = None
                                        oCodico = None

                                        # Read the manuscript object from the Galway site
                                        oGalway = self.get_galway(catalogue_id)
                                        # Extract Manuscript information and Codico information from [oGalway]
                                        oManu, oCodico = self.get_manucodico(
                                            oGalway)

                                        if oManu != None and oCodico != None:
                                            libname = "{}, {}, {}".format(
                                                oManu['country_name'],
                                                oManu['city_name'],
                                                oManu['library_name'])

                                            # Add manuscript (if not yet there)
                                            manu = Manuscript.custom_add(
                                                oManu, **kwargs)

                                            if manu.library == None:
                                                # Log that the library is not recognized
                                                oErr.Status(
                                                    "Library not recognized: {}"
                                                    .format(libname))
                                                # Also add this in the notes
                                                notes = "" if manu.notes == None else manu.notes
                                                manu.notes = "Library not found: {}  \n{}".format(
                                                    libname, notes)

                                            # Make sure to add the source and the RAW data
                                            manu.source = source
                                            manu.raw = json.dumps(oGalway,
                                                                  indent=2)
                                            manu.save()

                                            # Now get the codicological unit that has been automatically created and adapt it
                                            codico = manu.manuscriptcodicounits.first(
                                            )
                                            if codico != None:
                                                oCodico['manuscript'] = manu
                                                codico = Codico.custom_add(
                                                    oCodico, **kwargs)

                                            # Process results
                                            add_manu(lst_manual,
                                                     lst_read,
                                                     status=oResult['status'],
                                                     user=oResult['user'],
                                                     name=codico.name,
                                                     daterange=oCodico[
                                                         'date ranges'],
                                                     library=libname,
                                                     filename=manu.idno,
                                                     sermons=0,
                                                     idno=manu.idno)

                                            oResult['count'] += 1
                                            #oResult['obj'] = manu
                                            #oResult['name'] = manu.idno

                                            oResultManu = dict(
                                                name=manu.idno,
                                                filename=oManu['url'],
                                                sermons=0)
                                            lResults.append(oResultManu)

                        # Create a report and add it to what we return
                        oContents = {
                            'headers': lHeader,
                            'list': lst_manual,
                            'read': lst_read
                        }
                        oReport = Report.make(username, "xlsx",
                                              json.dumps(oContents))

                        # Determine a status code
                        statuscode = "error" if oResult == None or oResult[
                            'status'] == "error" else "completed"
                        if oResult == None:
                            self.arErr.append(
                                "There was an error. No manuscripts have been added"
                            )
                        else:
                            lResults.append(oResult)

            # Make sure we have a success message available
            code = "Imported using the [import_galway] function on this file: {}".format(
                ", ".join(file_list))

            # Indicate we are ready
            oStatus.set("ready")
        except:
            bOkay = False
            code = oErr.get_error_message()
        return bOkay, code
Ejemplo n.º 12
0
def enrich_experiment():
    """RUn a complete experiment"""

    sBack = "Experiment has started"
    oErr = ErrHandle()

    # Define the randomization method
    method = "pergroup"     # Method doesn't work correctly
    method = "quadro"       # Production method for the situation of 78 speakers
    method = "stochastic"   # Method should work for random chosen stuff
    method = "gendered"     # Gendered variant of quadro
    method = "alternative"  # Yet another alternative method
    method = "square"       # Simpler method for squares: spkr == sent

    # Other initialisations
    cnt_speakers = 48   # Number of speakers - used to be 78
    cnt_sentences = 48  # Number of sentences recorded per speaker
    cnt_conditions = 2  # Number of ntype conditions
    cnt_round = 4       # Number of rounds of testsets to be created

    # Make sure the speakers have the right id and gender
    SPEAKER_INFO = [
        {"SpeakerNum":1,"Gender":"f"},{"SpeakerNum":4,"Gender":"m"},{"SpeakerNum":8,"Gender":"f"},
        {"SpeakerNum":9,"Gender":"f"},{"SpeakerNum":11,"Gender":"f"},{"SpeakerNum":12,"Gender":"f"},
        {"SpeakerNum":13,"Gender":"f"},{"SpeakerNum":14,"Gender":"f"},{"SpeakerNum":15,"Gender":"m"},
        {"SpeakerNum":16,"Gender":"m"},{"SpeakerNum":17,"Gender":"f"},{"SpeakerNum":18,"Gender":"f"},
        {"SpeakerNum":19,"Gender":"f"},{"SpeakerNum":21,"Gender":"f"},{"SpeakerNum":22,"Gender":"m"},
        {"SpeakerNum":23,"Gender":"f"},{"SpeakerNum":25,"Gender":"f"},{"SpeakerNum":27,"Gender":"f"},
        {"SpeakerNum":30,"Gender":"f"},{"SpeakerNum":31,"Gender":"f"},{"SpeakerNum":32,"Gender":"f"},
        {"SpeakerNum":33,"Gender":"f"},{"SpeakerNum":34,"Gender":"f"},{"SpeakerNum":35,"Gender":"m"},
        {"SpeakerNum":36,"Gender":"f"},{"SpeakerNum":37,"Gender":"m"},{"SpeakerNum":38,"Gender":"f"},
        {"SpeakerNum":39,"Gender":"f"},{"SpeakerNum":40,"Gender":"m"},{"SpeakerNum":42,"Gender":"f"},
        {"SpeakerNum":43,"Gender":"f"},{"SpeakerNum":44,"Gender":"m"},{"SpeakerNum":46,"Gender":"m"},
        {"SpeakerNum":47,"Gender":"f"},{"SpeakerNum":48,"Gender":"f"},{"SpeakerNum":49,"Gender":"f"},
        {"SpeakerNum":51,"Gender":"f"},{"SpeakerNum":52,"Gender":"m"},{"SpeakerNum":53,"Gender":"f"},
        {"SpeakerNum":54,"Gender":"f"},{"SpeakerNum":55,"Gender":"f"},{"SpeakerNum":57,"Gender":"f"},
        {"SpeakerNum":61,"Gender":"m"},{"SpeakerNum":64,"Gender":"m"},{"SpeakerNum":65,"Gender":"m"},
        {"SpeakerNum":69,"Gender":"m"},{"SpeakerNum":70,"Gender":"m"},{"SpeakerNum":78,"Gender":"m"}]

    SENTENCE_NAMES = ["F1", "K1", "F2", "K2", "F3", "K3", "F4", "K4", "F5", "K5", "F6", "K6", "F7", "K7", 
                      "F8", "K8", "F9", "K9", "F10", "K10", "F11", "K11", "F12", "K12", "F13", "K13", 
                      "F14", "K14", "F15", "K15", "F16", "K16", "F17", "K17", "F18", "K18", "F19", "K19", 
                      "F20", "K20", "F21", "K21", "F22", "K22", "F23", "K23", "F24", "K24"]

    try:
        # Remove all previous participant-testunit combinations
        TestsetUnit.objects.all().delete()
        oErr.Status("Previous testset-testunit combinations have been removed")

        # Remove testsets if the numbers don't add up
        if Testset.objects.count() != cnt_conditions * cnt_sentences * cnt_round:
            oErr.Status("Deleting previous testsets (numbers don't match)")
            Testset.objects.all().delete()

        # Remove speakers above the cnt_speakers count
        if Speaker.objects.count() > cnt_speakers:
            delete_speakers = []
            for idx, obj in enumerate(Speaker.objects.all()):
                if idx >= cnt_speakers:
                    delete_speakers.append(obj.id)
            Speaker.objects.filter(id__in=delete_speakers).delete()

        # Make sure the 48 speakers have the correct ID and gender
        with transaction.atomic():
            for idx, obj in enumerate(Speaker.objects.all()):
                oInfo = SPEAKER_INFO[idx]
                name = "{}".format(oInfo['SpeakerNum'])
                gender = oInfo['Gender']
                if obj.name != name or obj.gender != gender:
                    obj.name = name
                    obj.gender = gender
                    obj.save()

        # Make sure the 48 sentences have the right ID
        with transaction.atomic():
            for idx, obj in enumerate(Sentence.objects.all()):
                name = SENTENCE_NAMES[idx]
                if name != obj.name:
                    obj.name = name
                    obj.save()

        # Check the filenames and calculate them if needed
        if Information.get_kvalue("enrich-filenames") != "done":
            # Walk all testunits
            with transaction.atomic():
                for obj in Testunit.objects.all():
                    obj.fname = obj.get_filename()
                    obj.save()
            Information.set_kvalue("enrich-filenames", "done")

        # Create testset for each round
        for round in range(cnt_round):

            # Create test-sets: each testset must contain cnt_sentences 
            testsets = []

            if method == "pergroup":
                # Method-specific initializations
                cnt_groupsize = 48  # Number of speakers in one group - used to be 6
                # How many testunit items are needed per combination of SpeakerGroup + Ntype?
                cnt_pertestset = cnt_sentences // (cnt_conditions + 12 // cnt_groupsize )

                # Simple testsets
                for i in range(cnt_speakers * cnt_conditions): testsets.append([])

                # Create sets of [cnt_groupsize] speakers
                speaker_group = []
                cnt_speakergroup = cnt_speakers // cnt_groupsize
                spk = [x.id for x in Speaker.objects.all()]
                random.shuffle(spk)
                for spk_idx in range(cnt_speakergroup):
                    start = spk_idx * cnt_groupsize
                    end = start + cnt_groupsize
                    oSet = spk[start:end]
                    speaker_group.append(oSet)

                # Create speakergroup-pn-sets
                idx_testset = 0
                for sg_id, speaker_ids in enumerate(speaker_group):
                    for ntype in ['n', 'p']:
                        # Get all the tunits for this combination of speaker/ntype
                        qs = Testunit.objects.filter(speaker__id__in=speaker_ids, ntype=ntype)

                        # ========== DEBUG ===========
                        #if qs.count() != 288:
                        #    iStop = 1
                        # ============================

                        # Walk them and increment their count
                        tunits = []
                        with transaction.atomic():
                            for obj in qs:
                                obj.count = obj.count + 1
                                tunits.append(obj.id)

                        # Create one list of tunit ids
                        tunits = [x.id for x in qs]
                        random.shuffle(tunits)

                        # Divide this combination of SpeakerGroup + Ntype over the testsets
                        idx_chunk = 0
                        while idx_chunk + cnt_pertestset <= len(tunits):
                            # copy a chunk to the next testset
                            testset = testsets[idx_testset]
                            for idx in range(cnt_pertestset):
                                # ========== DEBUG ===========
                                # oErr.Status("adding tunit item {} of {}".format(idx_chunk+idx, qs.count()))
                                # ============================

                                testset.append( tunits[idx_chunk + idx])
                            # Go to the next testset
                            idx_testset += 1
                            if idx_testset >= len(testsets): 
                                idx_testset = 0

                            # Next chunk 
                            idx_chunk += cnt_pertestset

                # Shuffle each testset
                for testset in testsets:
                    random.shuffle(testset)
 
                # We now have 156 sets of 48 tunits: these are the testsets for this particular round
                with transaction.atomic():
                    for idx, testset in enumerate(testsets):
                        # Get the testset object for this round
                        tsobj = Testset.get_testset(round+1, idx+1)

                        # ========== DEBUG ===========
                        # oErr.Status("round {} testset {}".format(round+1, idx+1))
                        # ============================

                        # Add testsets to this particular round
                        qs = Testunit.objects.filter(id__in=testset)
                        for tunit in qs:
                            tunit.testsets.add(tsobj)

            elif method == "quadro":
                # Divide groups of 4 titems of one author over testsets

                # Simple testsets
                for i in range(cnt_speakers * cnt_conditions): testsets.append([])
                idx_testset = 0

                # Prepare NP-type
                nptypes_odd = ['n', 'p']
                nptypes_even = ['p', 'n']

                # Iterate over random speakers
                lst_spk = [x.id for x in Speaker.objects.all()]
                random.shuffle(lst_spk)
                for idx_spk, spk in enumerate(lst_spk):
                    # Determine the order of NP type
                    nptypes = nptypes_odd if idx_spk % 2 == 0 else nptypes_even
                    for ntype in nptypes:
                        # Determine the set of speaker-nptype
                        lst_tunit = [x.id for x in Testunit.objects.filter(speaker__id=spk, ntype=ntype)]
                        # Shuffle these tunit items
                        random.shuffle(lst_tunit)
                        # Copy every four of them in consecutive testsets
                        number = len(lst_tunit) // 4
                        for idx in range(number):
                            start = idx * 4
                            for add in range(4):
                                testsets[idx_testset].append(lst_tunit[start+add])
                            # Go to the next testset
                            idx_testset += 1
                            if idx_testset >= len(testsets):
                                idx_testset = 0
                        
                # Shuffle each testset
                for testset in testsets:
                    random.shuffle(testset)
 
                # We now have 156 sets of 48 tunits: these are the testsets for this particular round
                with transaction.atomic():
                    for idx, testset in enumerate(testsets):
                        # Get the testset object for this round
                        tsobj = Testset.get_testset(round+1, idx+1)

                        # ========== DEBUG ===========
                        oErr.Status("round {} testset {}".format(round+1, idx+1))
                        # ============================

                        # Add testsets to this particular round
                        qs = Testunit.objects.filter(id__in=testset)
                        for tunit in qs:
                            # tunit.testsets.add(tsobj)
                            TestsetUnit.objects.create(testunit=tunit, testset=tsobj)

            elif method == "stochastic" and cnt_speakers == cnt_sentences:
                # Each person:
                #   1. must hear all 48 different sentences
                #   2. and these sentences must be of 48 different speakers
                cnt_participants = cnt_speakers * 2

                tunits = []

                testsets = []

                # (1) Get the speaker ids
                speakers = [x.id for x in Speaker.objects.all()]

                # (2a) Prepare what is available for each sentence
                sentences = [x.id for x in Sentence.objects.all()]

                # (2b) Walk all sentences
                sent_set = []

                speaker_start = 0
                speaker_sets = []

                # Get a list of speakers with which I am allowed to start
                first_speaker = copy.copy(speakers)

                # (3) Start creating sets for Participants, based on speaker/sentence
                for ptcp in range(0, cnt_speakers):
                    # Create a random row of speaker indexes - but these too must differ from what we had
                    bFound = False
                    while not bFound:
                        speaker_set = list(range(cnt_speakers))
                        random.shuffle(speaker_set)
                        bFound = (not speaker_set in speaker_sets)
                    speaker_sets.append(speaker_set)

                    # Create a new 'random' latin-square of [speaker/sentence] *indexes*
                    list_of_rows = latin_square2(speaker_set)

                    for ntype in ['n', 'p']:
                        # One time we go through the square [n-p], next time [p-n]

                        # Room for the testset for this participant
                        testset = []

                        for idx_sent, sent in enumerate(sentences):
                            # Walk all the speakers in this sentence
                            for idx_spk in range(cnt_speakers):
                                # Get the speaker identifier from the square
                                speaker_id = speakers[list_of_rows[idx_sent][idx_spk]]

                                # Switch to the other noise type
                                ntype = "n" if ntype == "p" else "p"

                                tunit = Testunit.objects.filter(speaker=speaker_id, sentence=sent, ntype=ntype).first()
                                testset.append(tunit.id)

                                if tunit.id in tunits:
                                    iStop = 1
                                else:
                                    tunits.append(tunit.id)

                     # We have a testset for this participant
                    random.shuffle(testset)
                    testsets.append(testset)

                    # ========== DEBUG ===========
                    oErr.Status("round {} testset {}".format(round+1, ptcp+1))
                    # ============================


                # We now have 156 sets of 48 tunits: these are the testsets for this particular round
                with transaction.atomic():
                    for idx, testset in enumerate(testsets):
                        # Get the testset object for this round
                        tsobj = Testset.get_testset(round+1, idx+1)

                        # ========== DEBUG ===========
                        oErr.Status("round {} testset {}".format(round+1, idx+1))
                        # ============================

                        # Add testsets to this particular round
                        qs = Testunit.objects.filter(id__in=testset)
                        for tunit in qs:
                            TestsetUnit.objects.create(testunit=tunit, testset=tsobj)

            elif method == "square" and cnt_speakers == cnt_sentences:
                """Simplere method when there are as many speakers as sentences"""

                # Simple testsets
                testsets = []
                idx_testset = 0
                tunits = []
                np_division = ['n','n','n','p','p','p']

                # (1) Get the speaker ids
                speakers = [x.id for x in Speaker.objects.all()]
                speakers_m = [x.id for x in Speaker.objects.filter(gender="m")]
                speakers_f = [x.id for x in Speaker.objects.filter(gender="f")]
                oSpeakerSet = Speakerset()
                oSpeakerSet.initialize(speakers_m, speakers_f, 2, 4)

                # Create 8 speaker sets of 4 F + 2 M each (random): 8 sets * 6 speakers = 48 speakers
                speakersets = []
                for idx in range(8):
                    speakersets.append(oSpeakerSet.get_speaker_set())

                # (2a) Prepare what is available for each sentence
                sentences = [x.id for x in Sentence.objects.all()]

                # Iterate over the speakersets
                for speakerset in speakersets:
                    # Create a random order of sentences
                    snt_order = list(sentences)
                    random.shuffle(snt_order)

                    # Start two LISTS (!) of test sets; each list gets 6 testsets
                    testsets_A = [ [], [], [], [], [], [] ]
                    testsets_B = [ [], [], [], [], [], [] ]

                    # Walk the 8 sets of 6 sentences (48 in total)
                    for set_idx in range(8):
                        # We now have a set of 6 sentences divided over 6 people: randomize them
                        blob_start = list(range(6))
                        blob_list = latin_square2(blob_start)

                        # Re-arrange np's
                        random.shuffle(np_division)

                        # Walk the sentences in this 6*6 blob
                        for snt_idx, blob in enumerate(blob_list):
                            # Get this sentence number
                            sentence = snt_order[set_idx * 6 + snt_idx]
                            # Get the ntype
                            ntype = np_division[snt_idx]
                            # Walk the blob itself
                            for idx_ts, col_part in enumerate(blob):
                                # The col_part determines the speaker
                                speaker = speakerset[col_part]

                                # The [idx_ts] is the index for testsets_a and testsets_b

                                # Add item to testset_a
                                tunit = Testunit.objects.filter(speaker__id=speaker, sentence=sentence, ntype=ntype).first()
                                testsets_A[idx_ts].append(tunit.id)
                                if tunit.id in tunits:
                                    iStop = 1
                                else:
                                    tunits.append(tunit.id)

                                # Calculate the other Ntype
                                ntype = "n" if ntype == "p" else "p"
                                tunit = Testunit.objects.filter(speaker__id=speaker, sentence=sentence, ntype=ntype).first()
                                testsets_B[idx_ts].append(tunit.id)
                                if tunit.id in tunits:
                                    iStop = 1
                                else:
                                    tunits.append(tunit.id)
                        
                        # The 6*6 blob has been treated: continue with the next set of 6 sentences

                    # Add the testsets to the list of testsets
                    for idx in range(6):
                        testsets.append(copy.copy(testsets_A[idx]))
                        testsets.append(copy.copy(testsets_B[idx]))
                    # Adjust the number of testsets produced: we have made twice 6 testsets in one sweep
                    idx_testset += 2 * 6

                    # ========== DEBUG ===========
                    oErr.Status("round {} testset {}".format(round+1, idx_testset+1))
                    # ============================

                # Shuffle each testset
                for testset in testsets:
                    random.shuffle(testset)
 
                # We now have 96 (2 * 48) sets of 48 tunits: these are the testsets for this particular round
                with transaction.atomic():
                    for idx, testset in enumerate(testsets):
                        # Get the testset object for this round
                        tsobj = Testset.get_testset(round+1, idx+1)

                        # ========== DEBUG ===========
                        oErr.Status("round {} testset {}".format(round+1, idx+1))
                        # ============================

                        # Add testsets to this particular round
                        qs = Testunit.objects.filter(id__in=testset)
                        for tunit in qs:
                            # tunit.testsets.add(tsobj)
                            TestsetUnit.objects.create(testunit=tunit, testset=tsobj)
                        # Debugging
                        x = Testunit.objects.count()
                        y = TestsetUnit.objects.count()
            elif method == "gendered":
                # Divide groups of 4 titems of one author over testsets
                # (so each testset gets 6 authors, 4*6=24)

                # Simple testsets
                for i in range(cnt_speakers * cnt_conditions): testsets.append([])
                idx_testset = 0
                tunits = []

                # (1) Get the speaker ids
                speakers = [x.id for x in Speaker.objects.all()]
                speakers_m = [x.id for x in Speaker.objects.filter(gender="m")]
                speakers_f = [x.id for x in Speaker.objects.filter(gender="f")]
                oSpeakerSet = Speakerset()
                oSpeakerSet.initialize(speakers_m, speakers_f, 2, 4)

                # (2) Prepare what is available for each sentence
                sentences = [x.id for x in Sentence.objects.all()]

                # (3) Each speaker gets a stack of sentence IDs
                stacks = []
                for idx in range(cnt_speakers):
                    sntcs = [sentences[x] for x in range(cnt_sentences)]
                    stacks.append(sntcs)

                # (4) Walk all the testsets (should be 96 testsets: 48 * 2)
                for idx_testset in range(cnt_speakers):
                    # Divide into testset A and testset B
                    testset_A = testsets[idx_testset * 2]
                    testset_B = testsets[idx_testset * 2 + 1]

                    # (5) Get a set of 6 speakers (4 female / 2 male) for this testset
                    lst_spkr = oSpeakerSet.get_speaker_set()
                    idx_spkr = -1
                    np_division = ['n','n','n','n','p','p','p','p']
                    idx_division = 0

                    # (6) walk all the sentences for this testset
                    for idx, sentence in enumerate(sentences):
                        # Get the speaker id for this sentence
                        if idx % 8 == 0:
                            # Go to the next speaker
                            idx_spkr += 1
                            random.shuffle(np_division)
                            idx_division = 0
                        speaker = lst_spkr[idx_spkr]
                        # Assign this sentence's variants to the two testsets
                        ntype = np_division[idx_division]
                        idx_division += 1
                        tunit = Testunit.objects.filter(speaker__id=speaker, sentence=sentence, ntype=ntype).first()
                        testset_A.append(tunit.id)
                        if tunit.id in tunits:
                            iStop = 1
                        else:
                            tunits.append(tunit.id)

                        # Calculate the other Ntype
                        ntype = "n" if ntype == "p" else "p"
                        tunit = Testunit.objects.filter(speaker__id=speaker, sentence=sentence, ntype=ntype).first()
                        testset_B.append(tunit.id)
                        if tunit.id in tunits:
                            iStop = 1
                        else:
                            tunits.append(tunit.id)

                        # Make sure we cycle through the np division
                        idx_division += 1
                        if idx_division >= len(np_division): idx_division = 0

                    # ========== DEBUG ===========
                    oErr.Status("round {} testset {}".format(round+1, idx_testset+1))
                    # ============================

                    # Make sure to go to the next testset
                    idx_testset += 1


                # Shuffle each testset
                for testset in testsets:
                    random.shuffle(testset)
 
                # We now have 156 sets of 48 tunits: these are the testsets for this particular round
                with transaction.atomic():
                    for idx, testset in enumerate(testsets):
                        # Get the testset object for this round
                        tsobj = Testset.get_testset(round+1, idx+1)

                        # ========== DEBUG ===========
                        oErr.Status("round {} testset {}".format(round+1, idx+1))
                        # ============================

                        # Add testsets to this particular round
                        qs = Testunit.objects.filter(id__in=testset)
                        for tunit in qs:
                            # tunit.testsets.add(tsobj)
                            TestsetUnit.objects.create(testunit=tunit, testset=tsobj)

            elif method == "alternative":
                """ALternative method, where we start with a completely random set every time"""

                testitems = []
                qs = Testunit.objects.all().values('id', 'sentence__id', 'speaker__id', 'ntype', 'speaker__gender')
                for obj in qs:
                    oTitem = TestItem(obj['id'], obj['sentence__id'], obj['speaker__id'], obj['ntype'], obj['speaker__gender'])
                    testitems.append(oTitem)

                # Randomize the testitems
                random.shuffle(testitems)
                
                # Simple testsets
                for i in range(cnt_speakers * cnt_conditions): testsets.append([])
                idx_testset = 0
                tunits = []

                # Prepare what is available for each sentence
                sentences = [x.id for x in Sentence.objects.all()]

                # Walk and populate all testsets
                for idx, testset in enumerate(testsets):
                    bCorrect = False
                    iTrial = 0
                    while not bCorrect:
                        if iTrial > 0:
                            # Restart the testset
                            testset = []
                            # General breakpoint
                            if iTrial > 100000:
                                # We cannot really do it well
                                return "We cannot get the right number"
                        iTrial += 1
                        # Initialize tracking lists
                        speaker_m = []
                        speaker_f = []
                        ntype_n = 0
                        ntype_p = 0
                        chosen = []
                        # Randomize again
                        random.shuffle(testitems)

                        # Assume we end up correct
                        bCorrect = True

                        # Create the testset from [testitems]
                        for sent in sentences:
                            # Find the next correct occurrance of this sentence in the list
                            bFound = False
                            for idx_titem, oTitem in enumerate(testitems):
                                # Is this a match?
                                if oTitem.ismatch(testitems, sent, ntype_n, ntype_p, speaker_m, speaker_f):
                                    # Bookkeeping
                                    if oTitem.ntype == "n":
                                        ntype_n += 1
                                    else:
                                        ntype_p += 1
                                    # Add the testitem to the testset
                                    testset.append(oTitem.id)
                                    # Pop the testitem
                                    testitems.pop(idx_titem)
                                    chosen.append(oTitem)
                                    bFound = True
                                    break
                            if not bFound:
                                bCorrect = False
                                # Add the testset items back to [testitems]
                                for item in chosen:
                                    testitems.append(item)
                                # ========== DEBUG ===========
                                if iTrial % 100 == 0:
                                    oErr.Status("Retrying: {}".format(iTrial))
                                # ============================
                                break

                    # ========== DEBUG ===========
                    oErr.Status("round {} testset {} retrials={}".format(round+1, idx+1, iTrial))
                    # ============================

                # Shuffle each testset
                for testset in testsets:
                    random.shuffle(testset)

                 # We now have 96 sets of 48 tunits: these are the testsets for this particular round
                with transaction.atomic():
                    for idx, testset in enumerate(testsets):
                        # Get the testset object for this round
                        tsobj = Testset.get_testset(round+1, idx+1)

                        # ========== DEBUG ===========
                        oErr.Status("round {} testset {}".format(round+1, idx+1))
                        # ============================

                        # Add testsets to this particular round
                        qs = Testunit.objects.filter(id__in=testset)
                        for tunit in qs:
                            # tunit.testsets.add(tsobj)
                            TestsetUnit.objects.create(testunit=tunit, testset=tsobj)



        # Set the message
        sBack = "Created {} testset rounds".format(cnt_round)
    except:
        msg = oErr.get_error_message()
        oErr.DoError("enrich_experiment")
        sBack = msg

    return sBack